72#include "llvm/IR/IntrinsicsAArch64.h"
108#define DEBUG_TYPE "aarch64-lower"
111STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
152 AArch64::X3, AArch64::X4, AArch64::X5,
153 AArch64::X6, AArch64::X7};
155 AArch64::Q3, AArch64::Q4, AArch64::Q5,
156 AArch64::Q6, AArch64::Q7};
181 return MVT::nxv8bf16;
188 switch (EC.getKnownMinValue()) {
204 "Expected scalable predicate vector type!");
226 "Expected legal vector type!");
272 switch (
Op.getOpcode()) {
283 switch (
Op.getConstantOperandVal(0)) {
286 case Intrinsic::aarch64_sve_ptrue:
287 case Intrinsic::aarch64_sve_pnext:
288 case Intrinsic::aarch64_sve_cmpeq:
289 case Intrinsic::aarch64_sve_cmpne:
290 case Intrinsic::aarch64_sve_cmpge:
291 case Intrinsic::aarch64_sve_cmpgt:
292 case Intrinsic::aarch64_sve_cmphs:
293 case Intrinsic::aarch64_sve_cmphi:
294 case Intrinsic::aarch64_sve_cmpeq_wide:
295 case Intrinsic::aarch64_sve_cmpne_wide:
296 case Intrinsic::aarch64_sve_cmpge_wide:
297 case Intrinsic::aarch64_sve_cmpgt_wide:
298 case Intrinsic::aarch64_sve_cmplt_wide:
299 case Intrinsic::aarch64_sve_cmple_wide:
300 case Intrinsic::aarch64_sve_cmphs_wide:
301 case Intrinsic::aarch64_sve_cmphi_wide:
302 case Intrinsic::aarch64_sve_cmplo_wide:
303 case Intrinsic::aarch64_sve_cmpls_wide:
304 case Intrinsic::aarch64_sve_fcmpeq:
305 case Intrinsic::aarch64_sve_fcmpne:
306 case Intrinsic::aarch64_sve_fcmpge:
307 case Intrinsic::aarch64_sve_fcmpgt:
308 case Intrinsic::aarch64_sve_fcmpuo:
309 case Intrinsic::aarch64_sve_facgt:
310 case Intrinsic::aarch64_sve_facge:
311 case Intrinsic::aarch64_sve_whilege:
312 case Intrinsic::aarch64_sve_whilegt:
313 case Intrinsic::aarch64_sve_whilehi:
314 case Intrinsic::aarch64_sve_whilehs:
315 case Intrinsic::aarch64_sve_whilele:
316 case Intrinsic::aarch64_sve_whilelo:
317 case Intrinsic::aarch64_sve_whilels:
318 case Intrinsic::aarch64_sve_whilelt:
319 case Intrinsic::aarch64_sve_match:
320 case Intrinsic::aarch64_sve_nmatch:
321 case Intrinsic::aarch64_sve_whilege_x2:
322 case Intrinsic::aarch64_sve_whilegt_x2:
323 case Intrinsic::aarch64_sve_whilehi_x2:
324 case Intrinsic::aarch64_sve_whilehs_x2:
325 case Intrinsic::aarch64_sve_whilele_x2:
326 case Intrinsic::aarch64_sve_whilelo_x2:
327 case Intrinsic::aarch64_sve_whilels_x2:
328 case Intrinsic::aarch64_sve_whilelt_x2:
348 if (Subtarget->hasLS64()) {
354 if (Subtarget->hasFPARMv8()) {
362 if (Subtarget->hasNEON()) {
366 addDRTypeForNEON(MVT::v2f32);
367 addDRTypeForNEON(MVT::v8i8);
368 addDRTypeForNEON(MVT::v4i16);
369 addDRTypeForNEON(MVT::v2i32);
370 addDRTypeForNEON(MVT::v1i64);
371 addDRTypeForNEON(MVT::v1f64);
372 addDRTypeForNEON(MVT::v4f16);
373 if (Subtarget->hasBF16())
374 addDRTypeForNEON(MVT::v4bf16);
376 addQRTypeForNEON(MVT::v4f32);
377 addQRTypeForNEON(MVT::v2f64);
378 addQRTypeForNEON(MVT::v16i8);
379 addQRTypeForNEON(MVT::v8i16);
380 addQRTypeForNEON(MVT::v4i32);
381 addQRTypeForNEON(MVT::v2i64);
382 addQRTypeForNEON(MVT::v8f16);
383 if (Subtarget->hasBF16())
384 addQRTypeForNEON(MVT::v8bf16);
408 if (Subtarget->hasBF16()) {
425 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
598 if (Subtarget->hasCSSC()) {
677 if (Subtarget->hasFullFP16())
695 if (!Subtarget->hasFullFP16()) {
778 for (
MVT Ty : {MVT::f32, MVT::f64})
780 if (Subtarget->hasFullFP16())
787 for (
MVT Ty : {MVT::f32, MVT::f64})
789 if (Subtarget->hasFullFP16())
794 for (
auto VT : {MVT::f32, MVT::f64})
803 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
815 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
841#define LCALLNAMES(A, B, N) \
842 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
843 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
844 setLibcallName(A##N##_REL, #B #N "_rel"); \
845 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
846#define LCALLNAME4(A, B) \
847 LCALLNAMES(A, B, 1) \
848 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
849#define LCALLNAME5(A, B) \
850 LCALLNAMES(A, B, 1) \
851 LCALLNAMES(A, B, 2) \
852 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
853 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
854 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
855 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
856 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
857 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
858 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
864 if (Subtarget->hasLSE128()) {
878 if (Subtarget->hasLSE2()) {
1076 if (Subtarget->hasNEON()) {
1113 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1116 if (Subtarget->hasFullFP16()) {
1149 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1165 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1166 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1173 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1184 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1185 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1186 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1195 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1196 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1218 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1245 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1247 if (Subtarget->hasFullFP16())
1248 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1271 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1274 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1278 if (Subtarget->hasSME()) {
1286 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1295 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1368 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1374 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1375 MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1379 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1380 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1384 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1398 if (VT != MVT::nxv16i1) {
1405 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1406 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1407 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1448 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1449 MVT::nxv4f32, MVT::nxv2f64}) {
1521 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1537 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1538 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1550 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1564 for (
MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1565 MVT::v4i32, MVT::v1i64, MVT::v2i64})
1566 addTypeForFixedLengthSVE(VT,
true);
1569 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1570 addTypeForFixedLengthSVE(VT,
true);
1578 addTypeForFixedLengthSVE(VT,
false);
1581 addTypeForFixedLengthSVE(VT,
false);
1584 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1589 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1591 for (
auto VT : {MVT::v8f16, MVT::v4f32})
1617 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1618 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1630 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1642 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1649 if (Subtarget->hasSVE()) {
1698void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1708 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1721 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1722 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1745 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1754 for (
unsigned Opcode :
1772 for (
unsigned Opcode :
1808 if (Subtarget->hasD128()) {
1817 if (!Subtarget->hasSVE())
1822 if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
1823 ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
1824 ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
1828 if (OpVT != MVT::i32 && OpVT != MVT::i64)
1835 return !Subtarget->
hasSVEorSME() || VT != MVT::nxv16i1;
1838void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT,
1839 bool StreamingSVE) {
1861 while (InnerVT != VT) {
1874 while (InnerVT != VT) {
1973void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1978void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1996 Imm =
C->getZExtValue();
2007 return N->getOpcode() == Opc &&
2012 const APInt &Demanded,
2015 uint64_t OldImm = Imm, NewImm, Enc;
2020 if (Imm == 0 || Imm == Mask ||
2024 unsigned EltSize =
Size;
2041 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2043 uint64_t Sum = RotatedImm + NonDemandedBits;
2044 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2045 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2046 NewImm = (Imm | Ones) & Mask;
2074 while (EltSize <
Size) {
2075 NewImm |= NewImm << EltSize;
2081 "demanded bits should never be altered");
2082 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2085 EVT VT =
Op.getValueType();
2091 if (NewImm == 0 || NewImm == OrigMask) {
2116 EVT VT =
Op.getValueType();
2122 "i32 or i64 is expected after legalization.");
2129 switch (
Op.getOpcode()) {
2133 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2136 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2139 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2154 switch (
Op.getOpcode()) {
2160 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2161 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2162 "Expected DUP implicit truncation");
2163 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2177 ~(
Op->getConstantOperandVal(1) <<
Op->getConstantOperandVal(2));
2226 case Intrinsic::aarch64_ldaxr:
2227 case Intrinsic::aarch64_ldxr: {
2229 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
2239 unsigned IntNo =
Op.getConstantOperandVal(0);
2243 case Intrinsic::aarch64_neon_uaddlv: {
2244 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2246 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2247 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2254 case Intrinsic::aarch64_neon_umaxv:
2255 case Intrinsic::aarch64_neon_uminv: {
2260 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2262 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2266 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2280 unsigned Depth)
const {
2281 EVT VT =
Op.getValueType();
2283 unsigned Opcode =
Op.getOpcode();
2317 unsigned *
Fast)
const {
2318 if (Subtarget->requiresStrictAlign())
2323 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2342 unsigned *
Fast)
const {
2343 if (Subtarget->requiresStrictAlign())
2348 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2372#define MAKE_CASE(V) \
2719 Register DestReg =
MI.getOperand(0).getReg();
2720 Register IfTrueReg =
MI.getOperand(1).getReg();
2721 Register IfFalseReg =
MI.getOperand(2).getReg();
2722 unsigned CondCode =
MI.getOperand(3).getImm();
2723 bool NZCVKilled =
MI.getOperand(4).isKill();
2754 MI.eraseFromParent();
2762 "SEH does not use catchret!");
2774 Register TargetReg =
MI.getOperand(0).getReg();
2776 TII.probedStackAlloc(
MBBI, TargetReg,
false);
2778 MI.eraseFromParent();
2779 return NextInst->getParent();
2790 MIB.
add(
MI.getOperand(1));
2791 MIB.
add(
MI.getOperand(2));
2792 MIB.
add(
MI.getOperand(3));
2793 MIB.
add(
MI.getOperand(4));
2794 MIB.
add(
MI.getOperand(5));
2796 MI.eraseFromParent();
2807 MIB.
add(
MI.getOperand(0));
2808 MIB.
add(
MI.getOperand(1));
2809 MIB.
add(
MI.getOperand(2));
2810 MIB.
add(
MI.getOperand(1));
2812 MI.eraseFromParent();
2819 bool Op0IsDef)
const {
2825 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
2826 MIB.
add(
MI.getOperand(
I));
2828 MI.eraseFromParent();
2838 unsigned StartIdx = 0;
2842 MIB.
addReg(BaseReg +
MI.getOperand(0).getImm());
2847 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
2848 MIB.
add(
MI.getOperand(
I));
2850 MI.eraseFromParent();
2859 MIB.
add(
MI.getOperand(0));
2861 unsigned Mask =
MI.getOperand(0).getImm();
2862 for (
unsigned I = 0;
I < 8;
I++) {
2863 if (Mask & (1 <<
I))
2867 MI.eraseFromParent();
2875 if (SMEOrigInstr != -1) {
2879 switch (SMEMatrixType) {
2881 return EmitZAInstr(SMEOrigInstr, AArch64::ZA,
MI, BB,
false);
2883 return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0,
MI, BB,
true);
2885 return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0,
MI, BB,
true);
2887 return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0,
MI, BB,
true);
2889 return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0,
MI, BB,
true);
2891 return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0,
MI, BB,
true);
2895 switch (
MI.getOpcode()) {
2902 case AArch64::F128CSEL:
2904 case TargetOpcode::STATEPOINT:
2910 MI.addOperand(*
MI.getMF(),
2916 case TargetOpcode::STACKMAP:
2917 case TargetOpcode::PATCHPOINT:
2920 case TargetOpcode::PATCHABLE_EVENT_CALL:
2921 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2924 case AArch64::CATCHRET:
2927 case AArch64::PROBED_STACKALLOC_DYN:
2930 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
2931 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
2932 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
2933 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
2934 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
2935 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
2936 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
2937 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
2938 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
2939 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
2940 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
2941 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
2942 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
2943 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
2944 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
2945 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
2946 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
2947 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
2948 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
2949 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
2950 case AArch64::LDR_ZA_PSEUDO:
2952 case AArch64::LDR_TX_PSEUDO:
2954 case AArch64::STR_TX_PSEUDO:
2956 case AArch64::ZERO_M_PSEUDO:
2958 case AArch64::ZERO_T_PSEUDO:
2984 N =
N->getOperand(0).getNode();
2992 auto Opnd0 =
N->getOperand(0);
3145 CondCode, CondCode2);
3152 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3154 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3176 EVT VT =
LHS.getValueType();
3181 if (VT == MVT::f16 && !FullFP16) {
3186 Chain =
RHS.getValue(1);
3191 return DAG.
getNode(Opcode, dl, {VT, MVT::Other}, {Chain,
LHS,
RHS});
3196 EVT VT =
LHS.getValueType();
3201 if (VT == MVT::f16 && !FullFP16) {
3238 return LHS.getValue(1);
3304 unsigned Opcode = 0;
3307 if (
LHS.getValueType().isFloatingPoint()) {
3308 assert(
LHS.getValueType() != MVT::f128);
3309 if (
LHS.getValueType() == MVT::f16 && !FullFP16) {
3347 bool &MustBeFirst,
bool WillNegate,
3348 unsigned Depth = 0) {
3356 MustBeFirst =
false;
3363 bool IsOR = Opcode ==
ISD::OR;
3375 if (MustBeFirstL && MustBeFirstR)
3381 if (!CanNegateL && !CanNegateR)
3385 CanNegate = WillNegate && CanNegateL && CanNegateR;
3388 MustBeFirst = !CanNegate;
3393 MustBeFirst = MustBeFirstL || MustBeFirstR;
3417 bool isInteger =
LHS.getValueType().isInteger();
3419 CC = getSetCCInverse(
CC,
LHS.getValueType());
3425 assert(
LHS.getValueType().isFloatingPoint());
3438 Predicate = ExtraCC;
3451 bool IsOR = Opcode ==
ISD::OR;
3457 assert(ValidL &&
"Valid conjunction/disjunction tree");
3464 assert(ValidR &&
"Valid conjunction/disjunction tree");
3469 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
3478 bool NegateAfterAll;
3482 assert(CanNegateR &&
"at least one side must be negatable");
3483 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
3487 NegateAfterR =
true;
3490 NegateR = CanNegateR;
3491 NegateAfterR = !CanNegateR;
3494 NegateAfterAll = !Negate;
3496 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
3497 assert(!Negate &&
"Valid conjunction/disjunction tree");
3501 NegateAfterR =
false;
3502 NegateAfterAll =
false;
3522 bool DummyCanNegate;
3523 bool DummyMustBeFirst;
3535 auto isSupportedExtend = [&](
SDValue V) {
3540 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3541 uint64_t Mask = MaskCst->getZExtValue();
3542 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
3548 if (!
Op.hasOneUse())
3551 if (isSupportedExtend(
Op))
3554 unsigned Opc =
Op.getOpcode();
3556 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3557 uint64_t Shift = ShiftCst->getZExtValue();
3558 if (isSupportedExtend(
Op.getOperand(0)))
3559 return (Shift <= 4) ? 2 : 1;
3560 EVT VT =
Op.getValueType();
3561 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
3572 EVT VT =
RHS.getValueType();
3581 if ((VT == MVT::i32 &&
C != 0x80000000 &&
3583 (VT == MVT::i64 &&
C != 0x80000000ULL &&
3592 if ((VT == MVT::i32 &&
C != 0 &&
3602 if ((VT == MVT::i32 &&
C != INT32_MAX &&
3613 if ((VT == MVT::i32 &&
C != UINT32_MAX &&
3668 cast<LoadSDNode>(
LHS)->getMemoryVT() == MVT::i16 &&
3669 LHS.getNode()->hasNUsesOfValue(1, 0)) {
3670 int16_t ValueofRHS =
RHS->getAsZExtVal();
3676 RHS.getValueType()),
3698static std::pair<SDValue, SDValue>
3700 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
3701 "Unsupported value type");
3707 switch (
Op.getOpcode()) {
3731 if (
Op.getValueType() == MVT::i32) {
3754 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
3783 Overflow =
Value.getValue(1);
3785 return std::make_pair(
Value, Overflow);
3791 return LowerToScalableOp(
Op, DAG);
3838 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
3845 if (!CFVal || !CTVal)
3882 return Cmp.getValue(1);
3911 unsigned Opcode,
bool IsSigned) {
3912 EVT VT0 =
Op.getValue(0).getValueType();
3913 EVT VT1 =
Op.getValue(1).getValueType();
3915 if (VT0 != MVT::i32 && VT0 != MVT::i64)
3969 unsigned IsWrite =
Op.getConstantOperandVal(2);
3970 unsigned Locality =
Op.getConstantOperandVal(3);
3971 unsigned IsData =
Op.getConstantOperandVal(4);
3973 bool IsStream = !Locality;
3977 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
3981 Locality = 3 - Locality;
3985 unsigned PrfOp = (IsWrite << 4) |
3996 EVT VT =
Op.getValueType();
4001 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4003 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4009 if (
Op.getValueType().isScalableVector())
4012 bool IsStrict =
Op->isStrictFPOpcode();
4013 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4017 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4019 if (SrcVT != MVT::f128) {
4036 bool IsStrict =
Op->isStrictFPOpcode();
4037 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4038 EVT VT =
Op.getValueType();
4044 return LowerToPredicatedOp(
Op, DAG, Opcode);
4049 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4055 !Subtarget->hasFullFP16()) {
4060 {
Op.getOperand(0),
Op.getOperand(1)});
4061 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
4062 {Ext.getValue(1), Ext.getValue(0)});
4065 Op.getOpcode(), dl,
Op.getValueType(),
4071 if (VTSize < InVTSize) {
4076 {Op.getOperand(0), Op.getOperand(1)});
4086 if (VTSize > InVTSize) {
4093 {
Op.getOperand(0),
Op.getOperand(1)});
4094 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
4095 {Ext.getValue(1), Ext.getValue(0)});
4098 return DAG.
getNode(
Op.getOpcode(), dl, VT, Ext);
4107 Op.getOperand(IsStrict ? 1 : 0), DAG.
getConstant(0, dl, MVT::i64));
4110 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4111 {Op.getOperand(0), Extract});
4112 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
4121 bool IsStrict =
Op->isStrictFPOpcode();
4122 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4125 return LowerVectorFP_TO_INT(
Op, DAG);
4128 if (SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4133 {
Op.getOperand(0), SrcVal});
4134 return DAG.
getNode(
Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
4135 {Ext.getValue(1), Ext.getValue(0)});
4138 Op.getOpcode(), dl,
Op.getValueType(),
4151AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
4157 EVT DstVT =
Op.getValueType();
4158 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4163 assert(SatWidth <= DstElementWidth &&
4164 "Saturation width cannot exceed result width");
4175 if (SrcElementVT == MVT::f16 &&
4176 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
4180 SrcElementVT = MVT::f32;
4181 SrcElementWidth = 32;
4182 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
4183 SrcElementVT != MVT::f16)
4188 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
4189 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
4196 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
4227 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
4229 EVT DstVT =
Op.getValueType();
4230 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4233 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
4236 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
4239 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
4244 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
4245 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4246 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
4247 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
4253 if (DstWidth < SatWidth)
4280 bool IsStrict =
Op->isStrictFPOpcode();
4281 EVT VT =
Op.getValueType();
4284 EVT InVT =
In.getValueType();
4285 unsigned Opc =
Op.getOpcode();
4293 In = DAG.
getNode(CastOpc, dl, CastVT, In);
4294 return DAG.
getNode(Opc, dl, VT, In);
4299 return LowerToPredicatedOp(
Op, DAG, Opcode);
4304 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
4308 if (VTSize < InVTSize) {
4313 In = DAG.
getNode(Opc, dl, {CastVT, MVT::Other},
4314 {
Op.getOperand(0), In});
4324 if (VTSize > InVTSize) {
4327 In = DAG.
getNode(CastOpc, dl, CastVT, In);
4329 return DAG.
getNode(Opc, dl, {VT, MVT::Other}, {
Op.getOperand(0), In});
4330 return DAG.
getNode(Opc, dl, VT, In);
4341 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4342 {Op.getOperand(0), Extract});
4343 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
4351 if (
Op.getValueType().isVector())
4352 return LowerVectorINT_TO_FP(
Op, DAG);
4354 bool IsStrict =
Op->isStrictFPOpcode();
4355 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4358 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4362 {Op.getOperand(0), SrcVal});
4369 DAG.
getNode(
Op.getOpcode(), dl, MVT::f32, SrcVal),
4379 if (
Op.getValueType() != MVT::f128)
4398 Entry.IsSExt =
false;
4399 Entry.IsZExt =
false;
4400 Args.push_back(Entry);
4403 : RTLIB::SINCOS_STRET_F32;
4414 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
4415 return CallResult.first;
4422 EVT OpVT =
Op.getValueType();
4423 EVT ArgVT =
Op.getOperand(0).getValueType();
4426 return LowerFixedLengthBitcastToSVE(
Op, DAG);
4439 "Expected int->fp bitcast!");
4443 return getSVESafeBitCast(OpVT, ExtResult, DAG);
4445 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
4448 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
4452 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
4455 assert(ArgVT == MVT::i16);
4470 switch (OrigSimpleTy) {
4483 unsigned ExtOpcode) {
4499static std::optional<uint64_t>
4503 return std::nullopt;
4508 return std::nullopt;
4510 return C->getZExtValue();
4515 EVT VT =
N.getValueType();
4520 for (
const SDValue &Elt :
N->op_values()) {
4523 unsigned HalfSize = EltSize / 2;
4525 if (!
isIntN(HalfSize,
C->getSExtValue()))
4528 if (!
isUIntN(HalfSize,
C->getZExtValue()))
4540 EVT VT =
N.getValueType();
4545 unsigned EltSize = OrigEltSize / 2;
4554 N.getOperand(0).getValueType(), VT,
4560 for (
unsigned i = 0; i != NumElts; ++i) {
4561 const APInt &CInt =
N.getConstantOperandAPInt(i);
4582 unsigned Opcode =
N.getOpcode();
4593 unsigned Opcode =
N.getOpcode();
4614 {Chain, DAG.
getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
4674 if (IsN0SExt && IsN1SExt)
4680 if (IsN0ZExt && IsN1ZExt)
4684 if (((IsN0SExt && IsN1ZExt) || (IsN0ZExt && IsN1SExt)) &&
4704 if (IsN0ZExt || IsN1ZExt) {
4712 if (!IsN1SExt && !IsN1ZExt)
4734 EVT VT =
Op.getValueType();
4743 "unexpected type for custom-lowering ISD::MUL");
4757 if (VT == MVT::v1i64) {
4758 if (Subtarget->hasSVE())
4775 if (Subtarget->hasSVE())
4791 "unexpected types for extended operands to VMULL");
4814 if (VT == MVT::nxv1i1 &&
Pattern == AArch64SVEPredPattern::all)
4821 bool IsLess,
bool IsEqual) {
4822 if (!isa<ConstantSDNode>(
Op.getOperand(1)) ||
4823 !isa<ConstantSDNode>(
Op.getOperand(2)))
4827 APInt X =
Op.getConstantOperandAPInt(1);
4828 APInt Y =
Op.getConstantOperandAPInt(2);
4829 APInt NumActiveElems;
4832 NumActiveElems = IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
4834 NumActiveElems = IsSigned ?
X.ssub_ov(
Y, Overflow) :
X.usub_ov(
Y, Overflow);
4841 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
4842 : NumActiveElems.
uadd_ov(One, Overflow);
4847 std::optional<unsigned> PredPattern =
4849 unsigned MinSVEVectorSize = std::max(
4851 unsigned ElementSize = 128 /
Op.getValueType().getVectorMinNumElements();
4852 if (PredPattern != std::nullopt &&
4853 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
4854 return getPTrue(DAG, dl,
Op.getValueType(), *PredPattern);
4863 EVT InVT =
Op.getValueType();
4867 "Expected a predicate-to-predicate bitcast");
4871 "Only expect to cast between legal scalable predicate types!");
4907 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
4909 RetTy, Callee, std::move(Args));
4910 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
4956 SDValue TileSlice =
N->getOperand(2);
4959 int32_t ConstAddend = 0;
4965 ConstAddend = cast<ConstantSDNode>(VecNum.
getOperand(1))->getSExtValue();
4967 }
else if (
auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
4968 ConstAddend = ImmNode->getSExtValue();
4972 int32_t ImmAddend = ConstAddend % 16;
4973 if (int32_t
C = (ConstAddend - ImmAddend)) {
4975 VarAddend = VarAddend
4996 {
N.getOperand(0), TileSlice,
Base,
5002 unsigned IntNo =
Op.getConstantOperandVal(1);
5007 case Intrinsic::aarch64_prefetch: {
5011 unsigned IsWrite =
Op.getConstantOperandVal(3);
5012 unsigned Locality =
Op.getConstantOperandVal(4);
5013 unsigned IsStream =
Op.getConstantOperandVal(5);
5014 unsigned IsData =
Op.getConstantOperandVal(6);
5015 unsigned PrfOp = (IsWrite << 4) |
5023 case Intrinsic::aarch64_sme_str:
5024 case Intrinsic::aarch64_sme_ldr: {
5027 case Intrinsic::aarch64_sme_za_enable:
5033 case Intrinsic::aarch64_sme_za_disable:
5044 unsigned IntNo =
Op.getConstantOperandVal(1);
5049 case Intrinsic::aarch64_mops_memset_tag: {
5050 auto Node = cast<MemIntrinsicSDNode>(
Op.getNode());
5056 auto Alignment =
Node->getMemOperand()->getAlign();
5057 bool IsVol =
Node->isVolatile();
5058 auto DstPtrInfo =
Node->getPointerInfo();
5077 unsigned IntNo =
Op.getConstantOperandVal(0);
5081 case Intrinsic::thread_pointer: {
5085 case Intrinsic::aarch64_neon_abs: {
5086 EVT Ty =
Op.getValueType();
5087 if (Ty == MVT::i64) {
5098 case Intrinsic::aarch64_neon_pmull64: {
5102 std::optional<uint64_t> LHSLane =
5104 std::optional<uint64_t> RHSLane =
5107 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
5108 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
5114 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
5115 std::optional<uint64_t> OtherLane,
5121 if (NLane && *NLane == 1)
5126 if (OtherLane && *OtherLane == 1) {
5133 if (NLane && *NLane == 0)
5146 assert(
N.getValueType() == MVT::i64 &&
5147 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
5151 LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, dl, DAG);
5152 RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, dl, DAG);
5156 case Intrinsic::aarch64_neon_smax:
5158 Op.getOperand(1),
Op.getOperand(2));
5159 case Intrinsic::aarch64_neon_umax:
5161 Op.getOperand(1),
Op.getOperand(2));
5162 case Intrinsic::aarch64_neon_smin:
5164 Op.getOperand(1),
Op.getOperand(2));
5165 case Intrinsic::aarch64_neon_umin:
5167 Op.getOperand(1),
Op.getOperand(2));
5168 case Intrinsic::aarch64_neon_scalar_sqxtn:
5169 case Intrinsic::aarch64_neon_scalar_sqxtun:
5170 case Intrinsic::aarch64_neon_scalar_uqxtn: {
5171 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
5172 if (
Op.getValueType() == MVT::i32)
5177 Op.getOperand(1))));
5180 case Intrinsic::aarch64_sve_whilelo:
5183 case Intrinsic::aarch64_sve_whilelt:
5186 case Intrinsic::aarch64_sve_whilels:
5189 case Intrinsic::aarch64_sve_whilele:
5192 case Intrinsic::aarch64_sve_whilege:
5195 case Intrinsic::aarch64_sve_whilegt:
5198 case Intrinsic::aarch64_sve_whilehs:
5201 case Intrinsic::aarch64_sve_whilehi:
5204 case Intrinsic::aarch64_sve_sunpkhi:
5207 case Intrinsic::aarch64_sve_sunpklo:
5210 case Intrinsic::aarch64_sve_uunpkhi:
5213 case Intrinsic::aarch64_sve_uunpklo:
5216 case Intrinsic::aarch64_sve_clasta_n:
5218 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
5219 case Intrinsic::aarch64_sve_clastb_n:
5221 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
5222 case Intrinsic::aarch64_sve_lasta:
5224 Op.getOperand(1),
Op.getOperand(2));
5225 case Intrinsic::aarch64_sve_lastb:
5227 Op.getOperand(1),
Op.getOperand(2));
5228 case Intrinsic::aarch64_sve_rev:
5231 case Intrinsic::aarch64_sve_tbl:
5233 Op.getOperand(1),
Op.getOperand(2));
5234 case Intrinsic::aarch64_sve_trn1:
5236 Op.getOperand(1),
Op.getOperand(2));
5237 case Intrinsic::aarch64_sve_trn2:
5239 Op.getOperand(1),
Op.getOperand(2));
5240 case Intrinsic::aarch64_sve_uzp1:
5242 Op.getOperand(1),
Op.getOperand(2));
5243 case Intrinsic::aarch64_sve_uzp2:
5245 Op.getOperand(1),
Op.getOperand(2));
5246 case Intrinsic::aarch64_sve_zip1:
5248 Op.getOperand(1),
Op.getOperand(2));
5249 case Intrinsic::aarch64_sve_zip2:
5251 Op.getOperand(1),
Op.getOperand(2));
5252 case Intrinsic::aarch64_sve_splice:
5254 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
5255 case Intrinsic::aarch64_sve_ptrue:
5256 return getPTrue(DAG, dl,
Op.getValueType(),
Op.getConstantOperandVal(1));
5257 case Intrinsic::aarch64_sve_clz:
5259 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5260 case Intrinsic::aarch64_sme_cntsb:
5263 case Intrinsic::aarch64_sme_cntsh: {
5268 case Intrinsic::aarch64_sme_cntsw: {
5274 case Intrinsic::aarch64_sme_cntsd: {
5280 case Intrinsic::aarch64_sve_cnt: {
5283 if (
Data.getValueType().isFloatingPoint())
5286 Op.getOperand(2),
Data,
Op.getOperand(1));
5288 case Intrinsic::aarch64_sve_dupq_lane:
5289 return LowerDUPQLane(
Op, DAG);
5290 case Intrinsic::aarch64_sve_convert_from_svbool:
5291 if (
Op.getValueType() == MVT::aarch64svcount)
5294 case Intrinsic::aarch64_sve_convert_to_svbool:
5295 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
5298 case Intrinsic::aarch64_sve_fneg:
5300 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5301 case Intrinsic::aarch64_sve_frintp:
5303 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5304 case Intrinsic::aarch64_sve_frintm:
5306 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5307 case Intrinsic::aarch64_sve_frinti:
5309 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5310 case Intrinsic::aarch64_sve_frintx:
5312 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5313 case Intrinsic::aarch64_sve_frinta:
5315 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5316 case Intrinsic::aarch64_sve_frintn:
5318 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5319 case Intrinsic::aarch64_sve_frintz:
5321 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5322 case Intrinsic::aarch64_sve_ucvtf:
5324 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5326 case Intrinsic::aarch64_sve_scvtf:
5328 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5330 case Intrinsic::aarch64_sve_fcvtzu:
5332 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5334 case Intrinsic::aarch64_sve_fcvtzs:
5336 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5338 case Intrinsic::aarch64_sve_fsqrt:
5340 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5341 case Intrinsic::aarch64_sve_frecpx:
5343 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5344 case Intrinsic::aarch64_sve_frecpe_x:
5347 case Intrinsic::aarch64_sve_frecps_x:
5349 Op.getOperand(1),
Op.getOperand(2));
5350 case Intrinsic::aarch64_sve_frsqrte_x:
5353 case Intrinsic::aarch64_sve_frsqrts_x:
5355 Op.getOperand(1),
Op.getOperand(2));
5356 case Intrinsic::aarch64_sve_fabs:
5358 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5359 case Intrinsic::aarch64_sve_abs:
5361 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5362 case Intrinsic::aarch64_sve_neg:
5364 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5365 case Intrinsic::aarch64_sve_insr: {
5368 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
5372 Op.getOperand(1), Scalar);
5374 case Intrinsic::aarch64_sve_rbit:
5376 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
5378 case Intrinsic::aarch64_sve_revb:
5380 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5381 case Intrinsic::aarch64_sve_revh:
5383 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5384 case Intrinsic::aarch64_sve_revw:
5386 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5387 case Intrinsic::aarch64_sve_revd:
5389 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
5390 case Intrinsic::aarch64_sve_sxtb:
5393 Op.getOperand(2),
Op.getOperand(3),
5394 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
5396 case Intrinsic::aarch64_sve_sxth:
5399 Op.getOperand(2),
Op.getOperand(3),
5400 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
5402 case Intrinsic::aarch64_sve_sxtw:
5405 Op.getOperand(2),
Op.getOperand(3),
5406 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
5408 case Intrinsic::aarch64_sve_uxtb:
5411 Op.getOperand(2),
Op.getOperand(3),
5412 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
5414 case Intrinsic::aarch64_sve_uxth:
5417 Op.getOperand(2),
Op.getOperand(3),
5418 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
5420 case Intrinsic::aarch64_sve_uxtw:
5423 Op.getOperand(2),
Op.getOperand(3),
5424 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
5426 case Intrinsic::localaddress: {
5429 unsigned Reg =
RegInfo->getLocalAddressRegister(MF);
5431 Op.getSimpleValueType());
5434 case Intrinsic::eh_recoverfp: {
5439 SDValue IncomingFPOp =
Op.getOperand(2);
5441 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
5444 "llvm.eh.recoverfp must take a function as the first argument");
5445 return IncomingFPOp;
5448 case Intrinsic::aarch64_neon_vsri:
5449 case Intrinsic::aarch64_neon_vsli:
5450 case Intrinsic::aarch64_sve_sri:
5451 case Intrinsic::aarch64_sve_sli: {
5452 EVT Ty =
Op.getValueType();
5459 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
5460 IntNo == Intrinsic::aarch64_sve_sri;
5462 return DAG.
getNode(Opcode, dl, Ty,
Op.getOperand(1),
Op.getOperand(2),
5466 case Intrinsic::aarch64_neon_srhadd:
5467 case Intrinsic::aarch64_neon_urhadd:
5468 case Intrinsic::aarch64_neon_shadd:
5469 case Intrinsic::aarch64_neon_uhadd: {
5470 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
5471 IntNo == Intrinsic::aarch64_neon_shadd);
5472 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
5473 IntNo == Intrinsic::aarch64_neon_urhadd);
5474 unsigned Opcode = IsSignedAdd
5477 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
5480 case Intrinsic::aarch64_neon_saddlp:
5481 case Intrinsic::aarch64_neon_uaddlp: {
5482 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
5485 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1));
5487 case Intrinsic::aarch64_neon_sdot:
5488 case Intrinsic::aarch64_neon_udot:
5489 case Intrinsic::aarch64_sve_sdot:
5490 case Intrinsic::aarch64_sve_udot: {
5491 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
5492 IntNo == Intrinsic::aarch64_sve_udot)
5495 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
5496 Op.getOperand(2),
Op.getOperand(3));
5498 case Intrinsic::get_active_lane_mask: {
5502 Op.getOperand(1),
Op.getOperand(2));
5504 case Intrinsic::aarch64_neon_uaddlv: {
5505 EVT OpVT =
Op.getOperand(1).getValueType();
5506 EVT ResVT =
Op.getValueType();
5507 if (ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
5508 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) {
5515 return EXTRACT_VEC_ELT;
5519 case Intrinsic::experimental_cttz_elts: {
5528bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
5537bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
5553bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
5563 if (
auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->
getOperand(0))) {
5570 unsigned NumExtMaskedLoads = 0;
5571 for (
auto *U : Ld->getMask()->uses())
5572 if (isa<MaskedLoadSDNode>(U))
5573 NumExtMaskedLoads++;
5575 if (NumExtMaskedLoads <= 1)
5584 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
5585 {std::make_tuple(
false,
false,
false),
5587 {std::make_tuple(
false,
false,
true),
5589 {std::make_tuple(
false,
true,
false),
5591 {std::make_tuple(
false,
true,
true),
5593 {std::make_tuple(
true,
false,
false),
5595 {std::make_tuple(
true,
false,
true),
5597 {std::make_tuple(
true,
true,
false),
5599 {std::make_tuple(
true,
true,
true),
5602 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
5603 return AddrModes.find(Key)->second;
5639 EVT VT =
Op.getValueType();
5663 EVT IndexVT =
Index.getValueType();
5676 "Cannot lower when not using SVE for fixed vectors!");
5686 Mask.getValueType().getVectorElementType() == MVT::i64)
5751 EVT IndexVT =
Index.getValueType();
5764 "Cannot lower when not using SVE for fixed vectors!");
5777 Mask.getValueType().getVectorElementType() == MVT::i64)
5787 if (PromotedVT != VT)
5811 assert(LoadNode &&
"Expected custom lowering of a masked load node");
5812 EVT VT =
Op->getValueType(0);
5815 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
5839 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
5852 {Undef, Undef, Undef, Undef});
5862 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
5863 ST->getBasePtr(), ST->getMemOperand());
5873 assert (StoreNode &&
"Can only custom lower store nodes");
5884 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
5896 MemVT == MVT::v4i8) {
5920 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
5924 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
5925 return LowerStore128(
Op, DAG);
5926 }
else if (MemVT == MVT::i64x8) {
5931 EVT PtrVT =
Base.getValueType();
5932 for (
unsigned i = 0; i < 8; i++) {
5953 bool IsStoreRelease =
5956 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
5957 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
5969 std::swap(StoreValue.first, StoreValue.second);
5972 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
5973 StoreNode->getBasePtr()},
5982 assert(LoadNode &&
"Expected custom lowering of a load node");
5988 EVT PtrVT =
Base.getValueType();
5989 for (
unsigned i = 0; i < 8; i++) {
6003 EVT VT =
Op->getValueType(0);
6004 assert((VT == MVT::v4i16 || VT == MVT::v4i32) &&
"Expected v4i16 or v4i32");
6026 if (VT == MVT::v4i32)
6033 MVT VT =
Op.getSimpleValueType();
6072 if (
auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
6074 MVT VT =
Op.getSimpleValueType();
6077 unsigned int NewShiftNo =
6092 EVT XScalarTy =
X.getValueType();
6097 switch (
Op.getSimpleValueType().SimpleTy) {
6105 ExpVT = MVT::nxv4i32;
6109 ExpVT = MVT::nxv2i64;
6120 AArch64SVEPredPattern::all);
6123 DAG.
getConstant(Intrinsic::aarch64_sve_fscale,
DL, MVT::i64),
6127 if (
X.getValueType() != XScalarTy)
6138 switch (
Op.getOpcode()) {
6143 return LowerBITCAST(
Op, DAG);
6145 return LowerGlobalAddress(
Op, DAG);
6147 return LowerGlobalTLSAddress(
Op, DAG);
6151 return LowerSETCC(
Op, DAG);
6153 return LowerSETCCCARRY(
Op, DAG);
6157 return LowerBR_CC(
Op, DAG);
6159 return LowerSELECT(
Op, DAG);
6161 return LowerSELECT_CC(
Op, DAG);
6163 return LowerJumpTable(
Op, DAG);
6165 return LowerBR_JT(
Op, DAG);
6167 return LowerConstantPool(
Op, DAG);
6169 return LowerBlockAddress(
Op, DAG);
6171 return LowerVASTART(
Op, DAG);
6173 return LowerVACOPY(
Op, DAG);
6175 return LowerVAARG(
Op, DAG);
6223 return LowerFP_ROUND(
Op, DAG);
6225 return LowerFP_EXTEND(
Op, DAG);
6227 return LowerFRAMEADDR(
Op, DAG);
6229 return LowerSPONENTRY(
Op, DAG);
6231 return LowerRETURNADDR(
Op, DAG);
6233 return LowerADDROFRETURNADDR(
Op, DAG);
6235 return LowerCONCAT_VECTORS(
Op, DAG);
6237 return LowerINSERT_VECTOR_ELT(
Op, DAG);
6239 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
6241 return LowerBUILD_VECTOR(
Op, DAG);
6243 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
6245 return LowerVECTOR_SHUFFLE(
Op, DAG);
6247 return LowerSPLAT_VECTOR(
Op, DAG);
6249 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
6251 return LowerINSERT_SUBVECTOR(
Op, DAG);
6254 return LowerDIV(
Op, DAG);
6259 return LowerMinMax(
Op, DAG);
6263 return LowerVectorSRA_SRL_SHL(
Op, DAG);
6267 return LowerShiftParts(
Op, DAG);
6270 return LowerCTPOP_PARITY(
Op, DAG);
6272 return LowerFCOPYSIGN(
Op, DAG);
6274 return LowerVectorOR(
Op, DAG);
6276 return LowerXOR(
Op, DAG);
6283 return LowerINT_TO_FP(
Op, DAG);
6288 return LowerFP_TO_INT(
Op, DAG);
6291 return LowerFP_TO_INT_SAT(
Op, DAG);
6293 return LowerFSINCOS(
Op, DAG);
6295 return LowerGET_ROUNDING(
Op, DAG);
6297 return LowerSET_ROUNDING(
Op, DAG);
6299 return LowerMUL(
Op, DAG);
6305 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
6307 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
6309 return LowerINTRINSIC_VOID(
Op, DAG);
6311 if (cast<MemSDNode>(
Op)->getMemoryVT() == MVT::i128) {
6312 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
6313 return LowerStore128(
Op, DAG);
6317 return LowerSTORE(
Op, DAG);
6319 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
6321 return LowerMGATHER(
Op, DAG);
6323 return LowerMSCATTER(
Op, DAG);
6325 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
6339 return LowerVECREDUCE(
Op, DAG);
6341 return LowerATOMIC_LOAD_AND(
Op, DAG);
6343 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
6345 return LowerVSCALE(
Op, DAG);
6349 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
6352 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
6354 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
6355 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
6358 return LowerToPredicatedOp(
Op, DAG,
6362 return LowerTRUNCATE(
Op, DAG);
6364 return LowerMLOAD(
Op, DAG);
6368 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
6369 return LowerLOAD(
Op, DAG);
6373 return LowerToScalableOp(
Op, DAG);
6383 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
6385 return LowerABS(
Op, DAG);
6399 return LowerBitreverse(
Op, DAG);
6405 return LowerCTTZ(
Op, DAG);
6407 return LowerVECTOR_SPLICE(
Op, DAG);
6409 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
6411 return LowerVECTOR_INTERLEAVE(
Op, DAG);
6416 assert(
Op.getOperand(0).getValueType() == MVT::f16 &&
6417 "Expected custom lowering of rounding operations only for f16");
6420 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
6426 assert(
Op.getOperand(1).getValueType() == MVT::f16 &&
6427 "Expected custom lowering of rounding operations only for f16");
6430 {
Op.getOperand(0),
Op.getOperand(1)});
6431 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
6432 {Ext.getValue(1), Ext.getValue(0)});
6435 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
6436 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
6441 std::pair<SDValue, SDValue> Pair =
6446 SysRegName, Pair.first, Pair.second);
6463 EVT VT,
bool OverrideNEON)
const {
6513 unsigned Opcode =
N->getOpcode();
6518 unsigned IID =
N->getConstantOperandVal(0);
6519 if (IID < Intrinsic::num_intrinsics)
6533 if (IID == Intrinsic::aarch64_neon_umull ||
6535 IID == Intrinsic::aarch64_neon_smull ||
6544 bool IsVarArg)
const {
6612AArch64TargetLowering::allocateLazySaveBuffer(
SDValue &Chain,
const SDLoc &
DL,
6638 EVT PtrTy =
Ptr.getValueType();
6651SDValue AArch64TargetLowering::LowerFormalArguments(
6680 unsigned NumArgs =
Ins.size();
6682 unsigned CurArgIdx = 0;
6683 for (
unsigned i = 0; i != NumArgs; ++i) {
6685 if (Ins[i].isOrigArg()) {
6686 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
6687 CurArgIdx =
Ins[i].getOrigArgIndex();
6694 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
6696 else if (ActualMVT == MVT::i16)
6699 bool UseVarArgCC =
false;
6701 UseVarArgCC = isVarArg;
6705 assert(!Res &&
"Call operand has unhandled type");
6710 bool IsLocallyStreaming =
6711 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
6716 unsigned ExtraArgLocs = 0;
6717 for (
unsigned i = 0, e =
Ins.size(); i != e; ++i) {
6720 if (Ins[i].
Flags.isByVal()) {
6724 int Size =
Ins[i].Flags.getByValSize();
6725 unsigned NumRegs = (
Size + 7) / 8;
6737 if (Ins[i].
Flags.isSwiftAsync())
6746 if (RegVT == MVT::i32)
6747 RC = &AArch64::GPR32RegClass;
6748 else if (RegVT == MVT::i64)
6749 RC = &AArch64::GPR64RegClass;
6750 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
6751 RC = &AArch64::FPR16RegClass;
6752 else if (RegVT == MVT::f32)
6753 RC = &AArch64::FPR32RegClass;
6755 RC = &AArch64::FPR64RegClass;
6757 RC = &AArch64::FPR128RegClass;
6761 RC = &AArch64::PPRRegClass;
6762 }
else if (RegVT == MVT::aarch64svcount) {
6764 RC = &AArch64::PPRRegClass;
6767 RC = &AArch64::ZPRRegClass;
6774 if (IsLocallyStreaming) {
6801 "Indirect arguments should be scalable on most subtargets");
6825 !
Ins[i].Flags.isInConsecutiveRegs())
6826 BEAlign = 8 - ArgSize;
6835 unsigned ObjOffset = ArgOffset + BEAlign;
6865 "Indirect arguments should be scalable on most subtargets");
6886 "Indirect arguments should be scalable on most subtargets");
6889 unsigned NumParts = 1;
6890 if (Ins[i].
Flags.isInConsecutiveRegs()) {
6892 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
6901 while (NumParts > 0) {
6909 DL,
Ptr.getValueType(),
6910 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
6913 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
6914 Ptr.getValueType());
6917 Flags.setNoUnsignedWrap(
true);
6919 BytesIncrement, Flags);
6931 if (Ins[i].isOrigArg()) {
6932 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
6934 if (!Ins[i].
Flags.isZExt()) {
6948 if (IsLocallyStreaming) {
6950 if (
Attrs.hasStreamingCompatibleInterface()) {
6951 PStateSM = getRuntimePStateSM(DAG, Chain,
DL, MVT::i64);
6964 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
6981 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
6985 unsigned VarArgsOffset = CCInfo.getStackSize();
6999 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
7003 if (!CCInfo.isAllocated(AArch64::X8)) {
7014 for (
unsigned I = 0,
E =
Ins.size();
I !=
E; ++
I) {
7016 Ins[
I].Flags.isInReg()) &&
7017 Ins[
I].Flags.isSRet()) {
7032 unsigned StackArgSize = CCInfo.getStackSize();
7034 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
7038 StackArgSize =
alignTo(StackArgSize, 16);
7057 unsigned TPIDR2Obj = allocateLazySaveBuffer(Chain,
DL, DAG);
7064void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
7085 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
7087 if (GPRSaveSize != 0) {
7090 if (GPRSaveSize & 15)
7110 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
7116 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
7126 if (Subtarget->hasFPARMv8() && !IsWin64) {
7128 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
7131 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
7133 if (FPRSaveSize != 0) {
7138 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
7153 if (!MemOps.
empty()) {
7165SDValue AArch64TargetLowering::LowerCallResult(
7169 SDValue ThisVal,
bool RequiresSMChange)
const {
7172 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
7177 if (i == 0 && isThisReturn) {
7179 "unexpected return calling convention register assignment");
7262 unsigned NumArgs = Outs.
size();
7263 for (
unsigned i = 0; i != NumArgs; ++i) {
7264 MVT ArgVT = Outs[i].VT;
7267 bool UseVarArgCC =
false;
7271 if (IsCalleeWin64) {
7274 UseVarArgCC = !Outs[i].IsFixed;
7285 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
7287 else if (ActualMVT == MVT::i16)
7293 assert(!Res &&
"Call operand has unhandled type");
7298bool AArch64TargetLowering::isEligibleForTailCallOptimization(
7299 const CallLoweringInfo &CLI)
const {
7305 bool IsVarArg = CLI.IsVarArg;
7318 if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
7319 CallerAttrs.requiresLazySave(CalleeAttrs) ||
7320 CallerAttrs.hasStreamingBody())
7331 bool CCMatch = CallerCC == CalleeCC;
7346 if (i->hasByValAttr())
7355 if (i->hasInRegAttr())
7373 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
7384 "Unexpected variadic calling convention");
7394 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
7396 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
7398 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
7399 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
7401 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7410 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
7414 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
7423 if (!ArgLoc.isRegLoc())
7435 A.getValVT().isScalableVector() ||
7437 "Expected value to be scalable");
7457 int ClobberedFI)
const {
7460 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
7471 if (FI->getIndex() < 0) {
7473 int64_t InLastByte = InFirstByte;
7476 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
7477 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
7485bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
7486 bool TailCallOpt)
const {
7497 APInt RequredZero(SizeInBits, 0xFE);
7499 bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
7503void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
7509 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
7510 MI.getOpcode() == AArch64::MSRpstatePseudo)
7511 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
7513 MO.isReg() && MO.isImplicit() && MO.isDef() &&
7514 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
7515 AArch64::GPR64RegClass.contains(MO.getReg())))
7516 MI.removeOperand(
I);
7545AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
7554 bool &IsTailCall = CLI.IsTailCall;
7556 bool IsVarArg = CLI.IsVarArg;
7560 bool IsThisReturn =
false;
7564 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
7565 bool IsSibCall =
false;
7566 bool GuardWithBTI =
false;
7568 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
7569 !Subtarget->noBTIAtReturnTwice()) {
7578 unsigned NumArgs = Outs.
size();
7580 for (
unsigned i = 0; i != NumArgs; ++i) {
7583 "currently not supported");
7594 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
7600 if (!Loc.isRegLoc())
7602 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
7603 AArch64::PPRRegClass.
contains(Loc.getLocReg());
7605 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
7611 IsTailCall = isEligibleForTailCallOptimization(CLI);
7623 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
7625 "site marked musttail");
7643 if (IsTailCall && !IsSibCall) {
7648 NumBytes =
alignTo(NumBytes, 16);
7653 FPDiff = NumReusableBytes - NumBytes;
7657 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
7665 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
7672 else if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
7673 CalleeAttrs =
SMEAttrs(ES->getSymbol());
7675 auto DescribeCallsite =
7678 if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
7679 R <<
ore::NV(
"Callee", ES->getSymbol());
7680 else if (CLI.CB && CLI.CB->getCalledFunction())
7681 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
7683 R <<
"unknown callee";
7688 bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
7689 if (RequiresLazySave) {
7699 Chain = DAG.
getTruncStore(Chain,
DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
7703 DAG.
getConstant(Intrinsic::aarch64_sme_set_tpidr2,
DL, MVT::i32),
7711 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
7716 bool RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs);
7717 if (RequiresSMChange) {
7718 if (CallerAttrs.hasStreamingInterfaceOrBody())
7720 else if (CallerAttrs.hasNonStreamingInterface())
7723 PStateSM = getRuntimePStateSM(DAG, Chain,
DL, MVT::i64);
7730 DescribeCallsite(R) <<
" requires a streaming mode transition";
7737 bool ShouldPreserveZT0 = CallerAttrs.requiresPreservingZT0(CalleeAttrs);
7741 if (ShouldPreserveZT0) {
7753 bool DisableZA = CallerAttrs.requiresDisablingZABeforeCall(CalleeAttrs);
7754 assert((!DisableZA || !RequiresLazySave) &&
7755 "Lazy-save should have PSTATE.SM=1 on entry to the function");
7776 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
7778 for (
const auto &
F : Forwards) {
7785 unsigned ExtraArgLocs = 0;
7786 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
7804 if (Outs[i].ArgVT == MVT::i1) {
7826 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
7843 "Indirect arguments should be scalable on most subtargets");
7847 unsigned NumParts = 1;
7848 if (Outs[i].
Flags.isInConsecutiveRegs()) {
7850 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
7852 StoreSize *= NumParts;
7878 DL,
Ptr.getValueType(),
7879 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
7882 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
7883 Ptr.getValueType());
7886 Flags.setNoUnsignedWrap(
true);
7890 BytesIncrement, Flags);
7901 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
7902 Outs[0].VT == MVT::i64) {
7904 "unexpected calling convention register assignment");
7905 assert(!
Ins.empty() && Ins[0].VT == MVT::i64 &&
7906 "unexpected use of 'returned'");
7907 IsThisReturn =
true;
7916 [=](
const std::pair<unsigned, SDValue> &Elt) {
7954 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
7956 OpSize = (OpSize + 7) / 8;
7958 !
Flags.isInConsecutiveRegs()) {
7960 BEAlign = 8 - OpSize;
7963 int32_t
Offset = LocMemOffset + BEAlign;
7977 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
7985 if (Outs[i].
Flags.isByVal()) {
7989 Chain,
DL, DstAddr, Arg, SizeNode,
7990 Outs[i].
Flags.getNonZeroByValAlign(),
8019 if (!MemOpChains.
empty())
8023 if (RequiresSMChange) {
8026 InGlue, PStateSM,
true);
8033 for (
auto &RegToPass : RegsToPass) {
8035 RegToPass.second, InGlue);
8042 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(Callee)) {
8043 auto GV =
G->getGlobal();
8053 }
else if (
auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8056 const char *
Sym = S->getSymbol();
8060 const char *
Sym = S->getSymbol();
8069 if (IsTailCall && !IsSibCall) {
8074 std::vector<SDValue> Ops;
8075 Ops.push_back(Chain);
8076 Ops.push_back(Callee);
8087 for (
auto &RegToPass : RegsToPass)
8089 RegToPass.second.getValueType()));
8096 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
8098 IsThisReturn =
false;
8099 Mask =
TRI->getCallPreservedMask(MF, CallConv);
8102 Mask =
TRI->getCallPreservedMask(MF, CallConv);
8105 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
8107 if (
TRI->isAnyArgRegReserved(MF))
8108 TRI->emitReservedArgRegCallError(MF);
8110 assert(Mask &&
"Missing call preserved mask for calling convention");
8114 Ops.push_back(InGlue);
8125 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
8138 "tail calls cannot be marked with clang.arc.attachedcall");
8145 Ops.insert(Ops.begin() + 1, GA);
8148 }
else if (GuardWithBTI) {
8153 Chain = DAG.
getNode(CallOpc,
DL, NodeTys, Ops);
8163 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
8171 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
8172 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
8177 if (RequiresSMChange) {
8178 assert(PStateSM &&
"Expected a PStateSM to be set");
8180 Result, InGlue, PStateSM,
false);
8183 if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
8190 if (ShouldPreserveZT0)
8193 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
8195 if (RequiresLazySave) {
8199 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
8204 DAG.
getConstant(Intrinsic::aarch64_sme_get_tpidr2,
DL, MVT::i32));
8215 RestoreRoutine, RegMask,
Result.getValue(1)});
8220 DAG.
getConstant(Intrinsic::aarch64_sme_set_tpidr2,
DL, MVT::i32),
8224 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0) {
8225 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
8242bool AArch64TargetLowering::CanLowerReturn(
8247 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
8269 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
8270 ++i, ++realRVLocIdx) {
8273 SDValue Arg = OutVals[realRVLocIdx];
8279 if (Outs[i].ArgVT == MVT::i1) {
8295 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
8304 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
8318 if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
8319 if (FuncAttrs.hasStreamingCompatibleInterface()) {
8321 assert(
Reg.isValid() &&
"PStateSM Register is invalid");
8328 DAG,
DL,
false, Chain,
8334 for (
auto &RetVal : RetVals) {
8335 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
8338 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
8349 unsigned RetValReg = AArch64::X0;
8351 RetValReg = AArch64::X8;
8362 if (AArch64::GPR64RegClass.
contains(*
I))
8364 else if (AArch64::FPR64RegClass.
contains(*
I))
8375 RetOps.push_back(Glue);
8384 getAddr(cast<ExternalSymbolSDNode>(Arm64ECRetDest), DAG, 0);
8387 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
8401 unsigned Flag)
const {
8403 N->getOffset(), Flag);
8408 unsigned Flag)
const {
8414 unsigned Flag)
const {
8416 N->getOffset(), Flag);
8421 unsigned Flag)
const {
8427 unsigned Flag)
const {
8432template <
class NodeTy>
8434 unsigned Flags)
const {
8445template <
class NodeTy>
8447 unsigned Flags)
const {
8461template <
class NodeTy>
8463 unsigned Flags)
const {
8475template <
class NodeTy>
8477 unsigned Flags)
const {
8493 "unexpected offset in global node");
8498 return getGOT(GN, DAG, OpFlags);
8504 Result = getAddrLarge(GN, DAG, OpFlags);
8506 Result = getAddrTiny(GN, DAG, OpFlags);
8508 Result = getAddr(GN, DAG, OpFlags);
8547AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
8550 "This function expects a Darwin target");
8555 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
8565 PtrMemVT,
DL, Chain, DescAddr,
8591 Chain, FuncTLVGet, DAG.
getRegister(AArch64::X0, MVT::i64),
8705SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
8721AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
8737 "in local exec TLS model");
8753 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
8776 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
8783 GV,
DL, MVT::i64, 0,
8800 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
8808AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
8847 Chain =
TLS.getValue(1);
8873 return LowerDarwinGlobalTLSAddress(
Op, DAG);
8875 return LowerELFGlobalTLSAddress(
Op, DAG);
8877 return LowerWindowsGlobalTLSAddress(
Op, DAG);
8888 cast<VTSDNode>(Val.
getOperand(1))->getVT().getFixedSizeInBits() -
8910 bool ProduceNonFlagSettingCondBr =
8916 if (
LHS.getValueType() == MVT::f128) {
8921 if (!
RHS.getNode()) {
8941 OFCC = getInvertedCondCode(OFCC);
8948 if (
LHS.getValueType().isInteger()) {
8950 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
8955 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
8962 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
8978 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
8999 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
9015 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
9016 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
9037 if (!Subtarget->hasNEON())
9040 EVT VT =
Op.getValueType();
9068 return getSVESafeBitCast(VT,
Op, DAG);
9075 auto SetVecVal = [&](
int Idx = -1) {
9082 VecVal1 = BitCast(VecVT, In1, DAG);
9083 VecVal2 = BitCast(VecVT, In2, DAG);
9089 }
else if (VT == MVT::f64) {
9091 SetVecVal(AArch64::dsub);
9092 }
else if (VT == MVT::f32) {
9094 SetVecVal(AArch64::ssub);
9095 }
else if (VT == MVT::f16) {
9097 SetVecVal(AArch64::hsub);
9108 if (VT == MVT::f64 || VT == MVT::v2f64) {
9124 return BitCast(VT, BSP, DAG);
9130 Attribute::NoImplicitFloat))
9133 if (!Subtarget->hasNEON())
9139 EVT VT =
Op.getValueType();
9143 if (VT == MVT::i32 && IsParity)
9154 if (VT == MVT::i32 || VT == MVT::i64) {
9171 }
else if (VT == MVT::i128) {
9186 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
9192 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
9193 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
9194 "Unexpected type for custom ctpop lowering");
9201 unsigned EltSize = 8;
9209 DAG.
getConstant(Intrinsic::aarch64_neon_uaddlp,
DL, MVT::i32), Val);
9216 EVT VT =
Op.getValueType();
9229 EVT VT =
Op.getValueType();
9231 unsigned Opcode =
Op.getOpcode();
9275 EVT VT =
Op.getValueType();
9326 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
9332 N =
N->getOperand(0);
9336 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
9342 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
9356 EVT VT =
N->getValueType(0);
9366 unsigned NumXors = 0;
9371 std::tie(XOR0, XOR1) = WorkList[0];
9374 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
9375 std::tie(XOR0, XOR1) = WorkList[
I];
9377 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
9389 if (
Op.getValueType().isVector())
9390 return LowerVSETCC(
Op, DAG);
9392 bool IsStrict =
Op->isStrictFPOpcode();
9394 unsigned OpNo = IsStrict ? 1 : 0;
9397 Chain =
Op.getOperand(0);
9404 EVT VT =
Op.getValueType();
9410 if (
LHS.getValueType() == MVT::f128) {
9415 if (!
RHS.getNode()) {
9417 "Unexpected setcc expansion!");
9422 if (
LHS.getValueType().isInteger()) {
9435 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
9436 LHS.getValueType() == MVT::f64);
9480 EVT VT =
LHS.getValueType();
9481 if (VT != MVT::i32 && VT != MVT::i64)
9489 LHS, RHS, InvCarry);
9491 EVT OpVT =
Op.getValueType();
9511 if (
LHS.getValueType() == MVT::f128) {
9516 if (!
RHS.getNode()) {
9523 if (
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
9529 if (
LHS.getValueType().isInteger()) {
9531 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
9542 EVT VT =
LHS.getValueType();
9555 LHS.getValueType() ==
RHS.getValueType()) {
9556 EVT VT =
LHS.getValueType();
9562 Shift = DAG.
getNOT(dl, Shift, VT);
9575 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
9595 }
else if (CTVal && CFVal) {
9603 if (TrueVal == ~FalseVal) {
9605 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
9606 TrueVal == -FalseVal) {
9617 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
9620 if (TrueVal32 > FalseVal32) {
9629 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
9632 if (TrueVal > FalseVal) {
9665 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
9668 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
9682 return DAG.
getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
9686 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
9687 LHS.getValueType() == MVT::f64);
9701 if (RHSVal && RHSVal->
isZero()) {
9709 CFVal && CFVal->
isZero() &&
9732 EVT Ty =
Op.getValueType();
9733 auto Idx =
Op.getConstantOperandAPInt(2);
9734 int64_t IdxVal =
Idx.getSExtValue();
9736 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
9745 std::optional<unsigned> PredPattern;
9778 return LowerSELECT_CC(
CC, LHS, RHS, TVal, FVal,
DL, DAG);
9788 EVT Ty =
Op.getValueType();
9789 if (Ty == MVT::aarch64svcount) {
9845 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
9852 SDValue Res = LowerSELECT_CC(
CC, LHS, RHS, TVal, FVal,
DL, DAG);
9854 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
9870 return getAddrLarge(JT, DAG);
9872 return getAddrTiny(JT, DAG);
9873 return getAddr(JT, DAG);
9883 int JTI = cast<JumpTableSDNode>(
JT.getNode())->getIndex();
9902 return getGOT(CP, DAG);
9905 return getAddrLarge(CP, DAG);
9907 return getAddrTiny(CP, DAG);
9909 return getAddr(CP, DAG);
9918 return getAddrLarge(BA, DAG);
9920 return getAddrTiny(BA, DAG);
9922 return getAddr(BA, DAG);
9934 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
9965 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
9983 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
10054 return LowerWin64_VASTART(
Op, DAG);
10056 return LowerDarwin_VASTART(
Op, DAG);
10058 return LowerAAPCS_VASTART(
Op, DAG);
10067 unsigned VaListSize =
10071 const Value *DestSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
10072 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
10076 Align(PtrSize),
false,
false,
false,
10082 "automatic va_arg instruction only works on Darwin");
10084 const Value *
V = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
10085 EVT VT =
Op.getValueType();
10100 "currently not supported");
10117 ArgSize = std::max(ArgSize, MinSlotSize);
10118 bool NeedFPTrunc =
false;
10121 NeedFPTrunc =
true;
10155 EVT VT =
Op.getValueType();
10157 unsigned Depth =
Op.getConstantOperandVal(0);
10181#define GET_REGISTER_MATCHER
10182#include "AArch64GenAsmMatcher.inc"
10189 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
10191 unsigned DwarfRegNum =
MRI->getDwarfRegNum(Reg,
false);
10193 !
MRI->isReservedReg(MF, Reg))
10206 EVT VT =
Op.getValueType();
10222 EVT VT =
Op.getValueType();
10224 unsigned Depth =
Op.getConstantOperandVal(0);
10227 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
10244 if (Subtarget->hasPAuth()) {
10272 bool OptForSize)
const {
10273 bool IsLegal =
false;
10277 const APInt ImmInt = Imm.bitcastToAPInt();
10278 if (VT == MVT::f64)
10280 else if (VT == MVT::f32)
10282 else if (VT == MVT::f16 || VT == MVT::bf16)
10292 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
10300 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
10301 IsLegal =
Insn.size() <= Limit;
10305 <<
" imm value: "; Imm.dump(););
10317 if ((ST->hasNEON() &&
10318 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
10319 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
10320 VT == MVT::v4f32)) ||
10322 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
10331 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
10341 EVT VT =
Op.getValueType();
10348AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
10357 bool Reciprocal)
const {
10361 DAG, ExtraSteps)) {
10366 Flags.setAllowReassociation(
true);
10370 for (
int i = ExtraSteps; i > 0; --i) {
10388 int &ExtraSteps)
const {
10391 DAG, ExtraSteps)) {
10396 Flags.setAllowReassociation(
true);
10400 for (
int i = ExtraSteps; i > 0; --i) {
10440const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
10448 if (!Subtarget->hasFPARMv8())
10464static std::optional<PredicateConstraint>
10467 .Case(
"Uph", PredicateConstraint::Uph)
10468 .
Case(
"Upl", PredicateConstraint::Upl)
10469 .
Case(
"Upa", PredicateConstraint::Upa)
10475 if (VT != MVT::aarch64svcount &&
10479 switch (Constraint) {
10480 case PredicateConstraint::Uph:
10481 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
10482 : &AArch64::PPR_p8to15RegClass;
10483 case PredicateConstraint::Upl:
10484 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
10485 : &AArch64::PPR_3bRegClass;
10486 case PredicateConstraint::Upa:
10487 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
10488 : &AArch64::PPRRegClass;
10496static std::optional<ReducedGprConstraint>
10499 .Case(
"Uci", ReducedGprConstraint::Uci)
10500 .
Case(
"Ucj", ReducedGprConstraint::Ucj)
10509 switch (Constraint) {
10510 case ReducedGprConstraint::Uci:
10511 return &AArch64::MatrixIndexGPR32_8_11RegClass;
10512 case ReducedGprConstraint::Ucj:
10513 return &AArch64::MatrixIndexGPR32_12_15RegClass;
10554SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
10556 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
10561 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
10562 OpInfo.ConstraintVT.getSizeInBits() < 8)
10577 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
10588AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
10589 if (Constraint.
size() == 1) {
10590 switch (Constraint[0]) {
10627AArch64TargetLowering::getSingleConstraintMatchWeight(
10628 AsmOperandInfo &
info,
const char *constraint)
const {
10630 Value *CallOperandVal =
info.CallOperandVal;
10633 if (!CallOperandVal)
10637 switch (*constraint) {
10659std::pair<unsigned, const TargetRegisterClass *>
10660AArch64TargetLowering::getRegForInlineAsmConstraint(
10662 if (Constraint.
size() == 1) {
10663 switch (Constraint[0]) {
10666 return std::make_pair(0U,
nullptr);
10668 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
10670 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
10671 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
10673 if (!Subtarget->hasFPARMv8())
10677 return std::make_pair(0U, &AArch64::ZPRRegClass);
10678 return std::make_pair(0U,
nullptr);
10682 return std::make_pair(0U, &AArch64::FPR16RegClass);
10684 return std::make_pair(0U, &AArch64::FPR32RegClass);
10686 return std::make_pair(0U, &AArch64::FPR64RegClass);
10688 return std::make_pair(0U, &AArch64::FPR128RegClass);
10694 if (!Subtarget->hasFPARMv8())
10697 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
10699 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
10702 if (!Subtarget->hasFPARMv8())
10705 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
10711 return std::make_pair(0U, RegClass);
10715 return std::make_pair(0U, RegClass);
10717 if (
StringRef(
"{cc}").equals_insensitive(Constraint) ||
10719 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
10721 if (Constraint ==
"{za}") {
10722 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
10725 if (Constraint ==
"{zt0}") {
10726 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
10731 std::pair<unsigned, const TargetRegisterClass *> Res;
10736 unsigned Size = Constraint.
size();
10737 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
10738 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
10741 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
10746 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
10747 Res.second = &AArch64::FPR64RegClass;
10749 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
10750 Res.second = &AArch64::FPR128RegClass;
10756 if (Res.second && !Subtarget->hasFPARMv8() &&
10757 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
10758 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
10759 return std::make_pair(0U,
nullptr);
10766 bool AllowUnknown)
const {
10767 if (Subtarget->hasLS64() && Ty->
isIntegerTy(512))
10768 return EVT(MVT::i64x8);
10775void AArch64TargetLowering::LowerAsmOperandForConstraint(
10781 if (Constraint.
size() != 1)
10784 char ConstraintLetter = Constraint[0];
10785 switch (ConstraintLetter) {
10796 if (
Op.getValueType() == MVT::i64)
10797 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
10799 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
10808 dyn_cast<BlockAddressSDNode>(
Op)) {
10828 switch (ConstraintLetter) {
10836 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
10841 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
10842 CVal =
C->getSExtValue();
10869 if (!isUInt<32>(CVal))
10873 if ((CVal & 0xFFFF) == CVal)
10875 if ((CVal & 0xFFFF0000ULL) == CVal)
10878 if ((NCVal & 0xFFFFULL) == NCVal)
10880 if ((NCVal & 0xFFFF0000ULL) == NCVal)
10887 if ((CVal & 0xFFFFULL) == CVal)
10889 if ((CVal & 0xFFFF0000ULL) == CVal)
10891 if ((CVal & 0xFFFF00000000ULL) == CVal)
10893 if ((CVal & 0xFFFF000000000000ULL) == CVal)
10896 if ((NCVal & 0xFFFFULL) == NCVal)
10898 if ((NCVal & 0xFFFF0000ULL) == NCVal)
10900 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
10902 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
10916 Ops.push_back(Result);
10943 EVT EltType = V.getValueType().getVectorElementType();
10953 EVT VT =
Op.getValueType();
10955 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
10959 if (VT != MVT::v16i8 && VT != MVT::v8i8)
10963 assert((NumElts == 8 || NumElts == 16) &&
10964 "Need to have exactly 8 or 16 elements in vector.");
10970 for (
unsigned i = 0; i < NumElts; ++i) {
10975 SDValue OperandSourceVec = V.getOperand(0);
10977 SourceVec = OperandSourceVec;
10978 else if (SourceVec != OperandSourceVec)
10984 SDValue MaskSource = V.getOperand(1);
10986 if (!isa<ConstantSDNode>(MaskSource.
getOperand(1)))
10991 }
else if (!AndMaskConstants.
empty()) {
11005 if (!isa<ConstantSDNode>(MaskIdx) ||
11006 !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
11011 if (!MaskSourceVec) {
11015 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
11029 if (!AndMaskConstants.
empty())
11035 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1, dl, MVT::i32), SourceVec,
11044 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
11046 EVT VT =
Op.getValueType();
11048 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
11051 struct ShuffleSourceInfo {
11066 ShuffleSourceInfo(
SDValue Vec)
11067 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
11068 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
11076 for (
unsigned i = 0; i < NumElts; ++i) {
11081 !isa<ConstantSDNode>(V.getOperand(1)) ||
11082 V.getOperand(0).getValueType().isScalableVector()) {
11084 dbgs() <<
"Reshuffle failed: "
11085 "a shuffle can only come from building a vector from "
11086 "various elements of other fixed-width vectors, provided "
11087 "their indices are constant\n");
11092 SDValue SourceVec = V.getOperand(0);
11093 auto Source =
find(Sources, SourceVec);
11094 if (Source == Sources.
end())
11095 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
11098 unsigned EltNo = V.getConstantOperandVal(1);
11099 Source->MinElt = std::min(Source->MinElt, EltNo);
11100 Source->MaxElt = std::max(Source->MaxElt, EltNo);
11105 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
11110 for (
unsigned I = 0;
I < NumElts; ++
I) {
11113 for (
unsigned OF = 0; OF < OutputFactor; OF++)
11114 Mask.push_back(-1);
11120 unsigned Lane = V.getConstantOperandVal(1);
11121 for (
unsigned S = 0; S < Sources.
size(); S++) {
11122 if (V.getOperand(0) == Sources[S].Vec) {
11123 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
11124 unsigned InputBase = 16 * S + Lane * InputSize / 8;
11125 for (
unsigned OF = 0; OF < OutputFactor; OF++)
11126 Mask.push_back(InputBase + OF);
11136 ? Intrinsic::aarch64_neon_tbl3
11137 : Intrinsic::aarch64_neon_tbl4,
11139 for (
unsigned i = 0; i < Sources.
size(); i++) {
11140 SDValue Src = Sources[i].Vec;
11141 EVT SrcVT = Src.getValueType();
11144 "Expected a legally typed vector");
11152 for (
unsigned i = 0; i < Mask.size(); i++)
11154 assert((Mask.size() == 8 || Mask.size() == 16) &&
11155 "Expected a v8i8 or v16i8 Mask");
11157 DAG.
getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
11161 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
11165 if (Sources.
size() > 2) {
11166 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
11167 <<
"sensible when at most two source vectors are "
11175 for (
auto &Source : Sources) {
11176 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
11177 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
11178 SmallestEltTy = SrcEltTy;
11181 unsigned ResMultiplier =
11190 for (
auto &Src : Sources) {
11191 EVT SrcVT = Src.ShuffleVec.getValueType();
11204 assert(2 * SrcVTSize == VTSize);
11209 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
11215 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
11219 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
11221 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
11225 if (Src.MinElt >= NumSrcElts) {
11230 Src.WindowBase = -NumSrcElts;
11231 }
else if (Src.MaxElt < NumSrcElts) {
11248 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
11249 "for SVE vectors.");
11256 Src.WindowBase = -Src.MinElt;
11263 for (
auto &Src : Sources) {
11264 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
11265 if (SrcEltTy == SmallestEltTy)
11276 Src.WindowBase *= Src.WindowScale;
11282 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
11289 if (Entry.isUndef())
11292 auto Src =
find(Sources, Entry.getOperand(0));
11293 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
11298 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
11301 int LanesDefined = BitsDefined / BitsPerShuffleLane;
11305 int *LaneMask = &Mask[i * ResMultiplier];
11307 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
11308 ExtractBase += NumElts * (Src - Sources.
begin());
11309 for (
int j = 0; j < LanesDefined; ++j)
11310 LaneMask[j] = ExtractBase + j;
11315 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
11320 for (
unsigned i = 0; i < Sources.
size(); ++i)
11333 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
11352 unsigned ExpectedElt = Imm;
11353 for (
unsigned i = 1; i < NumElts; ++i) {
11357 if (ExpectedElt == NumElts)
11362 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
11373 if (V.getValueType() != MVT::v16i8)
11375 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
11377 for (
unsigned X = 0;
X < 4;
X++) {
11380 SDValue BaseExt = V.getOperand(
X * 4);
11384 !isa<ConstantSDNode>(BaseExt.
getOperand(1)) ||
11389 for (
unsigned Y = 1;
Y < 4;
Y++) {
11392 Ext.getOperand(0) !=
Base ||
11393 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
11394 Ext.getConstantOperandVal(1) !=
Y)
11405 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
11406 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
11408 if (V.getValueType() == MVT::v4i32)
11424 unsigned &DupLaneOp) {
11426 "Only possible block sizes for wide DUP are: 16, 32, 64");
11445 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
11446 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
11447 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
11451 if ((
unsigned)Elt >= SingleVecNumElements)
11453 if (BlockElts[
I] < 0)
11454 BlockElts[
I] = Elt;
11455 else if (BlockElts[
I] != Elt)
11464 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
11465 assert(FirstRealEltIter != BlockElts.
end() &&
11466 "Shuffle with all-undefs must have been caught by previous cases, "
11468 if (FirstRealEltIter == BlockElts.
end()) {
11474 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
11476 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
11479 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
11482 if (Elt0 % NumEltsPerBlock != 0)
11486 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
11487 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
11490 DupLaneOp = Elt0 / NumEltsPerBlock;
11499 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
11504 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1);
11507 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
11508 return Elt != ExpectedElt++ && Elt != -1;
11541 "Only possible block sizes for REV are: 16, 32, 64, 128");
11545 unsigned BlockElts = M[0] + 1;
11553 for (
unsigned i = 0; i < NumElts; ++i) {
11556 if ((
unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
11565 if (NumElts % 2 != 0)
11567 WhichResult = (M[0] == 0 ? 0 : 1);
11568 unsigned Idx = WhichResult * NumElts / 2;
11569 for (
unsigned i = 0; i != NumElts; i += 2) {
11570 if ((M[i] >= 0 && (
unsigned)M[i] !=
Idx) ||
11571 (M[i + 1] >= 0 && (
unsigned)M[i + 1] !=
Idx + NumElts))
11581 WhichResult = (M[0] == 0 ? 0 : 1);
11582 for (
unsigned i = 0; i != NumElts; ++i) {
11585 if ((
unsigned)M[i] != 2 * i + WhichResult)
11594 if (NumElts % 2 != 0)
11596 WhichResult = (M[0] == 0 ? 0 : 1);
11597 for (
unsigned i = 0; i < NumElts; i += 2) {
11598 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
11599 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + NumElts + WhichResult))
11610 if (NumElts % 2 != 0)
11612 WhichResult = (M[0] == 0 ? 0 : 1);
11613 unsigned Idx = WhichResult * NumElts / 2;
11614 for (
unsigned i = 0; i != NumElts; i += 2) {
11615 if ((M[i] >= 0 && (
unsigned)M[i] !=
Idx) ||
11629 WhichResult = (M[0] == 0 ? 0 : 1);
11630 for (
unsigned j = 0; j != 2; ++j) {
11631 unsigned Idx = WhichResult;
11632 for (
unsigned i = 0; i != Half; ++i) {
11633 int MIdx = M[i + j * Half];
11634 if (MIdx >= 0 && (
unsigned)MIdx !=
Idx)
11648 if (NumElts % 2 != 0)
11650 WhichResult = (M[0] == 0 ? 0 : 1);
11651 for (
unsigned i = 0; i < NumElts; i += 2) {
11652 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
11653 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
11660 bool &DstIsLeft,
int &Anomaly) {
11661 if (M.size() !=
static_cast<size_t>(NumInputElements))
11664 int NumLHSMatch = 0, NumRHSMatch = 0;
11665 int LastLHSMismatch = -1, LastRHSMismatch = -1;
11667 for (
int i = 0; i < NumInputElements; ++i) {
11677 LastLHSMismatch = i;
11679 if (M[i] == i + NumInputElements)
11682 LastRHSMismatch = i;
11685 if (NumLHSMatch == NumInputElements - 1) {
11687 Anomaly = LastLHSMismatch;
11689 }
else if (NumRHSMatch == NumInputElements - 1) {
11691 Anomaly = LastRHSMismatch;
11704 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
11709 int Offset = NumElts / 2;
11710 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
11711 if (Mask[
I] !=
I + SplitLHS *
Offset)
11720 EVT VT =
Op.getValueType();
11755 unsigned OpNum = (PFEntry >> 26) & 0x0F;
11756 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
11757 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
11779 if (LHSID == (1 * 9 + 2) * 9 + 3)
11781 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
11785 if (OpNum == OP_MOVLANE) {
11787 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
11788 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
11794 return (
ID % 9 == 8) ? -1 :
ID % 9;
11803 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
11804 unsigned ExtLane = 0;
11810 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
11812 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
11813 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
11814 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
11815 Input = MaskElt < 2 ? V1 : V2;
11821 "Expected 16 or 32 bit shuffle elemements");
11826 int MaskElt = getPFIDLane(
ID, RHSID);
11827 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
11828 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
11829 Input = MaskElt < 4 ? V1 : V2;
11831 if (VT == MVT::v4i16) {
11874 if (EltTy == MVT::i8)
11876 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
11878 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
11880 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
11888 return DAG.
getNode(Opcode, dl, VT, OpLHS, Lane);
11919 EVT EltVT =
Op.getValueType().getVectorElementType();
11931 bool IsUndefOrZero = V2.isUndef() ||
isZerosVector(V2.getNode());
11932 MVT IndexVT = MVT::v8i8;
11933 unsigned IndexLen = 8;
11934 if (
Op.getValueSizeInBits() == 128) {
11935 IndexVT = MVT::v16i8;
11940 for (
int Val : ShuffleMask) {
11941 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
11942 unsigned Offset = Byte + Val * BytesPerElt;
11945 if (IsUndefOrZero &&
Offset >= IndexLen)
11955 if (IsUndefOrZero) {
11960 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1,
DL, MVT::i32), V1Cst,
11963 if (IndexLen == 8) {
11967 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1,
DL, MVT::i32), V1Cst,
11978 DAG.
getConstant(Intrinsic::aarch64_neon_tbl2,
DL, MVT::i32), V1Cst,
11987 if (EltType == MVT::i8)
11989 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
11991 if (EltType == MVT::i32 || EltType == MVT::f32)
11993 if (EltType == MVT::i64 || EltType == MVT::f64)
12002 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
12013 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
12015 if (ExtIdxInBits % CastedEltBitWidth != 0)
12023 LaneC += ExtIdxInBits / CastedEltBitWidth;
12030 unsigned SrcVecNumElts =
12037 if (getScaledOffsetDup(V, Lane, CastVT)) {
12038 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
12040 V.getOperand(0).getValueType().is128BitVector()) {
12043 Lane += V.getConstantOperandVal(1);
12044 V = V.getOperand(0);
12064 if (NumElts % 2 != 0)
12068 for (
unsigned i = 0; i < NumElts; i += 2) {
12073 if (
M0 == -1 &&
M1 == -1) {
12078 if (
M0 == -1 &&
M1 != -1 && (
M1 % 2) == 1) {
12083 if (
M0 != -1 && (
M0 % 2) == 0 && ((
M0 + 1) ==
M1 ||
M1 == -1)) {
12092 assert(NewMask.
size() == NumElts / 2 &&
"Incorrect size for mask!");
12108 EVT VT =
Op.getValueType();
12118 if (ElementSize > 32 || ElementSize == 1)
12148 EVT VT =
Op.getValueType();
12162 for (
unsigned I = 0;
I < 16;
I++) {
12163 if (ShuffleMask[
I] < 16)
12167 dyn_cast<ConstantSDNode>(Mask2->
getOperand(ShuffleMask[
I] - 16));
12170 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32, dl, MVT::i32);
12187AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
12190 EVT VT =
Op.getValueType();
12194 "Unexpected extension factor.");
12207 EVT VT =
Op.getValueType();
12212 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
12225 "Unexpected VECTOR_SHUFFLE mask size!");
12251 for (
unsigned LaneSize : {64U, 32U, 16U}) {
12263 V1 =
constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
12284 bool ReverseEXT =
false;
12286 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
12298 unsigned WhichResult;
12299 if (
isZIPMask(ShuffleMask, VT, WhichResult)) {
12303 if (
isUZPMask(ShuffleMask, VT, WhichResult)) {
12307 if (
isTRNMask(ShuffleMask, VT, WhichResult)) {
12331 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
12336 int SrcLane = ShuffleMask[Anomaly];
12337 if (SrcLane >= NumInputElements) {
12346 ScalarVT = MVT::i32;
12360 if (NumElts == 4) {
12361 unsigned PFIndexes[4];
12362 for (
unsigned i = 0; i != 4; ++i) {
12363 if (ShuffleMask[i] < 0)
12366 PFIndexes[i] = ShuffleMask[i];
12370 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
12371 PFIndexes[2] * 9 + PFIndexes[3];
12382 EVT VT =
Op.getValueType();
12385 return LowerToScalableOp(
Op, DAG);
12388 "Unexpected vector type!");
12391 if (isa<ConstantSDNode>(
Op.getOperand(0)))
12403 if (VT == MVT::nxv1i1)
12415 EVT VT =
Op.getValueType();
12427 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
12428 if (CIdx && (CIdx->getZExtValue() <= 3)) {
12458 APInt &UndefBits) {
12460 APInt SplatBits, SplatUndef;
12461 unsigned SplatBitSize;
12463 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
12466 for (
unsigned i = 0; i < NumSplats; ++i) {
12467 CnstBits <<= SplatBitSize;
12468 UndefBits <<= SplatBitSize;
12470 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
12481 const APInt &Bits) {
12482 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
12484 EVT VT =
Op.getValueType();
12503 const SDValue *LHS =
nullptr) {
12504 EVT VT =
Op.getValueType();
12509 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
12512 bool isAdvSIMDModImm =
false;
12532 if (isAdvSIMDModImm) {
12537 Mov = DAG.
getNode(NewOp, dl, MovTy,
12542 Mov = DAG.
getNode(NewOp, dl, MovTy,
12556 const SDValue *LHS =
nullptr) {
12557 EVT VT =
Op.getValueType();
12562 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
12565 bool isAdvSIMDModImm =
false;
12577 if (isAdvSIMDModImm) {
12582 Mov = DAG.
getNode(NewOp, dl, MovTy,
12587 Mov = DAG.
getNode(NewOp, dl, MovTy,
12601 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
12603 EVT VT =
Op.getValueType();
12605 bool isAdvSIMDModImm =
false;
12617 if (isAdvSIMDModImm) {
12631 const APInt &Bits) {
12632 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
12634 EVT VT =
Op.getValueType();
12652 const APInt &Bits) {
12653 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
12655 EVT VT =
Op.getValueType();
12658 bool isAdvSIMDModImm =
false;
12662 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
12667 MovTy = MVT::v2f64;
12670 if (isAdvSIMDModImm) {
12694 for (
unsigned i = 1; i < NumElts; ++i)
12695 if (dyn_cast<ConstantSDNode>(Bvec->
getOperand(i)) != FirstElt)
12704 N =
N.getOperand(0);
12710 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
12714 N =
N.getOperand(0);
12717 if (
N.getValueType().getVectorMinNumElements() < NumElts)
12728 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
12729 return N.getValueType().getVectorMinNumElements() >= NumElts;
12736 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
12737 if (MaxSVESize && MinSVESize == MaxSVESize) {
12739 unsigned PatNumElts =
12741 return PatNumElts == (NumElts * VScale);
12755 EVT VT =
N->getValueType(0);
12765 SDValue FirstOp =
N->getOperand(0);
12766 unsigned FirstOpc = FirstOp.
getOpcode();
12767 SDValue SecondOp =
N->getOperand(1);
12768 unsigned SecondOpc = SecondOp.
getOpcode();
12799 if (ShiftHasPredOp) {
12805 C2 =
C.getZExtValue();
12807 dyn_cast<ConstantSDNode>(Shift.
getOperand(1)))
12808 C2 = C2node->getZExtValue();
12822 assert(C1nodeImm && C1nodeShift);
12824 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
12830 if (C2 > ElemSizeInBits)
12835 if (C1AsAPInt != RequiredC1)
12859 return LowerToScalableOp(
Op, DAG);
12865 EVT VT =
Op.getValueType();
12871 dyn_cast<BuildVectorSDNode>(
Op.getOperand(1).getNode());
12874 LHS =
Op.getOperand(1);
12875 BVN = dyn_cast<BuildVectorSDNode>(
Op.getOperand(0).getNode());
12892 UndefBits, &LHS)) ||
12908 EVT VT =
Op.getValueType();
12920 if (
auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
12922 CstLane->getZExtValue());
12924 }
else if (Lane.getNode()->isUndef()) {
12927 assert(Lane.getValueType() == MVT::i32 &&
12928 "Unexpected BUILD_VECTOR operand type");
12936 EVT VT =
Op.getValueType();
12951 DefBits = ~DefBits;
12957 DefBits = UndefBits;
12966 DefBits = ~UndefBits;
12978 EVT VT =
Op.getValueType();
12981 if (
auto SeqInfo = cast<BuildVectorSDNode>(
Op)->isConstantSequence()) {
13010 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
13011 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
13015 if (
Const->isZero() && !
Const->isNegative())
13036 bool isOnlyLowElement =
true;
13037 bool usesOnlyOneValue =
true;
13038 bool usesOnlyOneConstantValue =
true;
13040 bool AllLanesExtractElt =
true;
13041 unsigned NumConstantLanes = 0;
13042 unsigned NumDifferentLanes = 0;
13043 unsigned NumUndefLanes = 0;
13047 unsigned ConsecutiveValCount = 0;
13049 for (
unsigned i = 0; i < NumElts; ++i) {
13052 AllLanesExtractElt =
false;
13058 isOnlyLowElement =
false;
13063 ++NumConstantLanes;
13064 if (!ConstantValue.
getNode())
13066 else if (ConstantValue != V)
13067 usesOnlyOneConstantValue =
false;
13070 if (!
Value.getNode())
13072 else if (V !=
Value) {
13073 usesOnlyOneValue =
false;
13074 ++NumDifferentLanes;
13077 if (PrevVal != V) {
13078 ConsecutiveValCount = 0;
13093 DifferentValueMap[
V] = ++ConsecutiveValCount;
13096 if (!
Value.getNode()) {
13098 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
13106 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
13107 "SCALAR_TO_VECTOR node\n");
13111 if (AllLanesExtractElt) {
13117 for (
unsigned i = 0; i < NumElts; ++i) {
13120 if (!isa<ConstantSDNode>(
N->getOperand(1))) {
13143 uint64_t Val =
N->getConstantOperandVal(1);
13144 if (Val == 2 * i) {
13148 if (Val - 1 == 2 * i) {
13177 if (usesOnlyOneValue) {
13180 Value.getValueType() != VT) {
13182 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
13190 if (
Value.getValueSizeInBits() == 64) {
13192 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
13204 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
13205 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
13207 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
13208 "BITCASTS, and try again\n");
13210 for (
unsigned i = 0; i < NumElts; ++i)
13214 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
13216 Val = LowerBUILD_VECTOR(Val, DAG);
13226 bool PreferDUPAndInsert =
13228 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
13229 NumDifferentLanes >= NumConstantLanes;
13235 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
13239 APInt ConstantValueAPInt(1, 0);
13240 if (
auto *
C = dyn_cast<ConstantSDNode>(ConstantValue))
13241 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
13243 !ConstantValueAPInt.isAllOnes()) {
13251 for (
unsigned i = 0; i < NumElts; ++i) {
13265 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
13277 if (NumElts >= 4) {
13285 if (PreferDUPAndInsert) {
13290 for (
unsigned I = 0;
I < NumElts; ++
I)
13301 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
13313 bool canUseVECTOR_CONCAT =
true;
13314 for (
auto Pair : DifferentValueMap) {
13316 if (Pair.second != NumElts / 2)
13317 canUseVECTOR_CONCAT =
false;
13330 if (canUseVECTOR_CONCAT) {
13353 if (NumElts >= 8) {
13356 SDValue FirstLaneVal =
Op.getOperand(0);
13357 for (
unsigned i = 0; i < NumElts; ++i) {
13359 if (FirstLaneVal == Val)
13383 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
13384 "of INSERT_VECTOR_ELT\n");
13401 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
13406 <<
"Creating nodes for the other vector elements:\n";);
13407 for (; i < NumElts; ++i) {
13418 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
13419 "better alternative\n");
13427 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
13429 assert(
Op.getValueType().isScalableVector() &&
13431 "Expected legal scalable vector type!");
13436 "Unexpected number of operands in CONCAT_VECTORS");
13438 if (NumOperands == 2)
13443 while (ConcatOps.size() > 1) {
13444 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
13452 ConcatOps.resize(ConcatOps.size() / 2);
13454 return ConcatOps[0];
13466 return LowerFixedLengthInsertVectorElt(
Op, DAG);
13468 EVT VT =
Op.getOperand(0).getValueType();
13482 ExtendedValue,
Op.getOperand(2));
13495AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
13498 EVT VT =
Op.getOperand(0).getValueType();
13507 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
13509 Extend,
Op.getOperand(1));
13514 return LowerFixedLengthExtractVectorElt(
Op, DAG);
13522 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
13523 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
13524 VT == MVT::v8f16 || VT == MVT::v8bf16)
13527 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
13528 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
13539 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
13549 assert(
Op.getValueType().isFixedLengthVector() &&
13550 "Only cases that extract a fixed length vector are supported!");
13552 EVT InVT =
Op.getOperand(0).getValueType();
13553 unsigned Idx =
Op.getConstantOperandVal(1);
13554 unsigned Size =
Op.getValueSizeInBits();
13595 assert(
Op.getValueType().isScalableVector() &&
13596 "Only expect to lower inserts into scalable vectors!");
13598 EVT InVT =
Op.getOperand(1).getValueType();
13599 unsigned Idx =
Op.getConstantOperandVal(2);
13604 EVT VT =
Op.getValueType();
13620 if (
Idx < (NumElts / 2)) {
13644 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
13645 Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG);
13660 "Invalid subvector index!");
13665 return getSVESafeBitCast(VT, Narrow, DAG);
13673 std::optional<unsigned> PredPattern =
13695 !isa<ConstantSDNode>(
Op->getOperand(0)))
13698 SplatVal =
Op->getConstantOperandVal(0);
13699 if (
Op.getValueType().getVectorElementType() != MVT::i64)
13700 SplatVal = (int32_t)SplatVal;
13708 SplatVal = -SplatVal;
13716 EVT VT =
Op.getValueType();
13720 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
13740 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
13741 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
13746 if (VT == MVT::nxv16i8)
13747 WidenedVT = MVT::nxv8i16;
13748 else if (VT == MVT::nxv8i16)
13749 WidenedVT = MVT::nxv4i32;
13759 SDValue ResultLo = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
13760 SDValue ResultHi = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
13778 unsigned DummyUnsigned;
13782 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
13805 Op =
Op.getOperand(0);
13807 APInt SplatBits, SplatUndef;
13808 unsigned SplatBitSize;
13810 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
13811 HasAnyUndefs, ElementBits) ||
13812 SplatBitSize > ElementBits)
13823 assert(VT.
isVector() &&
"vector shift count is not a vector type");
13827 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
13834 assert(VT.
isVector() &&
"vector shift count is not a vector type");
13838 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
13843 EVT VT =
Op.getValueType();
13848 EVT OpVT =
Op.getOperand(0).getValueType();
13860 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
13867 EVT VT =
Op.getValueType();
13871 if (!
Op.getOperand(1).getValueType().isVector())
13875 switch (
Op.getOpcode()) {
13881 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
13887 Op.getOperand(0),
Op.getOperand(1));
13894 return LowerToPredicatedOp(
Op, DAG, Opc);
13898 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
13901 return DAG.
getNode(Opc,
DL, VT,
Op.getOperand(0),
13908 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
13909 : Intrinsic::aarch64_neon_ushl;
13917 return NegShiftLeft;
13926 EVT SrcVT =
LHS.getValueType();
13928 "function only supposed to emit natural comparisons");
13932 unsigned SplatBitSize = 0;
13937 SplatBitSize, HasAnyUndefs);
13939 bool IsZero = IsCnst && SplatValue == 0;
13942 bool IsMinusOne = IsCnst && SplatValue.
isAllOnes();
13954 return DAG.
getNOT(dl, Fcmeq, VT);
13998 return DAG.
getNOT(dl, Cmeq, VT);
14037 if (
Op.getValueType().isScalableVector())
14042 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
14047 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
14050 if (
LHS.getValueType().getVectorElementType().isInteger()) {
14062 if (!FullFP16 &&
LHS.getValueType().getVectorElementType() == MVT::f16) {
14063 if (
LHS.getValueType().getVectorNumElements() == 4) {
14068 CmpVT = MVT::v4i32;
14073 assert((!FullFP16 &&
LHS.getValueType().getVectorElementType() != MVT::f16) ||
14074 LHS.getValueType().getVectorElementType() != MVT::f128);
14085 if (!
Cmp.getNode())
14115 unsigned ScalarOpcode;
14133 "Expected power-of-2 length vector");
14141 if (ElemVT == MVT::i1) {
14143 if (NumElems > 16) {
14146 EVT HalfVT =
Lo.getValueType();
14158 unsigned ExtendOp =
14162 switch (ScalarOpcode) {
14183 VecVT =
Lo.getValueType();
14199 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
14204 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
14218 EVT SrcVT = Src.getValueType();
14231 return LowerPredReductionToSVE(
Op, DAG);
14233 switch (
Op.getOpcode()) {
14267 switch (
Op.getOpcode()) {
14272 Op.getValueType(), dl, DAG);
14292 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
14297 MVT VT =
Op.getSimpleValueType();
14298 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
14303 Op.getOperand(0),
Op.getOperand(1), RHS,
14308AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
14317 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
14318 EVT VT =
Node->getValueType(0);
14321 "no-stack-arg-probe")) {
14329 SDValue Ops[2] = {SP, Chain};
14349 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
14369 SDValue Ops[2] = {SP, Chain};
14374AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
14382 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
14384 EVT VT =
Node->getValueType(0);
14396 SDValue Ops[2] = {SP, Chain};
14401AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
14406 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
14408 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
14421 unsigned NewOp)
const {
14422 if (Subtarget->hasSVE2())
14423 return LowerToPredicatedOp(
Op, DAG, NewOp);
14428 EVT VT =
Op.getValueType();
14447 if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) ||
14448 (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) {
14452 return DAG.
getNode(ShiftOpc, dl, VT,
Add, ConstantOne);
14455 SDValue ShiftOpA = DAG.
getNode(ShiftOpc, dl, VT, OpA, ConstantOne);
14456 SDValue ShiftOpB = DAG.
getNode(ShiftOpc, dl, VT, OpB, ConstantOne);
14466 EVT VT =
Op.getValueType();
14467 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
14470 APInt MulImm =
Op.getConstantOperandAPInt(0);
14476template <
unsigned NumVecs>
14486 for (
unsigned I = 0;
I < NumVecs; ++
I)
14495 Info.align.reset();
14506 unsigned Intrinsic)
const {
14507 auto &
DL =
I.getModule()->getDataLayout();
14508 switch (Intrinsic) {
14509 case Intrinsic::aarch64_sve_st2:
14510 return setInfoSVEStN<2>(*
this,
DL,
Info,
I);
14511 case Intrinsic::aarch64_sve_st3:
14512 return setInfoSVEStN<3>(*
this,
DL,
Info,
I);
14513 case Intrinsic::aarch64_sve_st4:
14514 return setInfoSVEStN<4>(*
this,
DL,
Info,
I);
14515 case Intrinsic::aarch64_neon_ld2:
14516 case Intrinsic::aarch64_neon_ld3:
14517 case Intrinsic::aarch64_neon_ld4:
14518 case Intrinsic::aarch64_neon_ld1x2:
14519 case Intrinsic::aarch64_neon_ld1x3:
14520 case Intrinsic::aarch64_neon_ld1x4: {
14522 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
14524 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
14526 Info.align.reset();
14531 case Intrinsic::aarch64_neon_ld2lane:
14532 case Intrinsic::aarch64_neon_ld3lane:
14533 case Intrinsic::aarch64_neon_ld4lane:
14534 case Intrinsic::aarch64_neon_ld2r:
14535 case Intrinsic::aarch64_neon_ld3r:
14536 case Intrinsic::aarch64_neon_ld4r: {
14540 auto *StructTy = cast<StructType>(
RetTy);
14541 unsigned NumElts = StructTy->getNumElements();
14542 Type *VecTy = StructTy->getElementType(0);
14545 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
14547 Info.align.reset();
14552 case Intrinsic::aarch64_neon_st2:
14553 case Intrinsic::aarch64_neon_st3:
14554 case Intrinsic::aarch64_neon_st4:
14555 case Intrinsic::aarch64_neon_st1x2:
14556 case Intrinsic::aarch64_neon_st1x3:
14557 case Intrinsic::aarch64_neon_st1x4: {
14559 unsigned NumElts = 0;
14560 for (
const Value *Arg :
I.args()) {
14561 Type *ArgTy = Arg->getType();
14564 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
14567 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
14569 Info.align.reset();
14574 case Intrinsic::aarch64_neon_st2lane:
14575 case Intrinsic::aarch64_neon_st3lane:
14576 case Intrinsic::aarch64_neon_st4lane: {
14578 unsigned NumElts = 0;
14580 Type *VecTy =
I.getArgOperand(0)->getType();
14583 for (
const Value *Arg :
I.args()) {
14584 Type *ArgTy = Arg->getType();
14591 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
14593 Info.align.reset();
14598 case Intrinsic::aarch64_ldaxr:
14599 case Intrinsic::aarch64_ldxr: {
14600 Type *ValTy =
I.getParamElementType(0);
14603 Info.ptrVal =
I.getArgOperand(0);
14605 Info.align =
DL.getABITypeAlign(ValTy);
14609 case Intrinsic::aarch64_stlxr:
14610 case Intrinsic::aarch64_stxr: {
14611 Type *ValTy =
I.getParamElementType(1);
14614 Info.ptrVal =
I.getArgOperand(1);
14616 Info.align =
DL.getABITypeAlign(ValTy);
14620 case Intrinsic::aarch64_ldaxp:
14621 case Intrinsic::aarch64_ldxp:
14623 Info.memVT = MVT::i128;
14624 Info.ptrVal =
I.getArgOperand(0);
14629 case Intrinsic::aarch64_stlxp:
14630 case Intrinsic::aarch64_stxp:
14632 Info.memVT = MVT::i128;
14633 Info.ptrVal =
I.getArgOperand(2);
14638 case Intrinsic::aarch64_sve_ldnt1: {
14639 Type *ElTy = cast<VectorType>(
I.getType())->getElementType();
14642 Info.ptrVal =
I.getArgOperand(1);
14644 Info.align =
DL.getABITypeAlign(ElTy);
14648 case Intrinsic::aarch64_sve_stnt1: {
14650 cast<VectorType>(
I.getArgOperand(0)->getType())->getElementType();
14653 Info.ptrVal =
I.getArgOperand(2);
14655 Info.align =
DL.getABITypeAlign(ElTy);
14659 case Intrinsic::aarch64_mops_memset_tag: {
14660 Value *Dst =
I.getArgOperand(0);
14661 Value *Val =
I.getArgOperand(1);
14666 Info.align =
I.getParamAlign(0).valueOrOne();
14692 MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
14697 Base.getOperand(1).hasOneUse() &&
14704 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
14706 if (ShiftAmount ==
Log2_32(LoadBytes))
14716 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
14735 return NumBits1 > NumBits2;
14742 return NumBits1 > NumBits2;
14749 if (
I->getOpcode() != Instruction::FMul)
14752 if (!
I->hasOneUse())
14757 if (!(
User->getOpcode() == Instruction::FSub ||
14758 User->getOpcode() == Instruction::FAdd))
14779 return NumBits1 == 32 && NumBits2 == 64;
14786 return NumBits1 == 32 && NumBits2 == 64;
14804bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
14805 if (isa<FPExtInst>(Ext))
14809 if (Ext->getType()->isVectorTy())
14812 for (
const Use &U : Ext->uses()) {
14817 const Instruction *Instr = cast<Instruction>(U.getUser());
14820 switch (Instr->getOpcode()) {
14821 case Instruction::Shl:
14822 if (!isa<ConstantInt>(Instr->getOperand(1)))
14825 case Instruction::GetElementPtr: {
14827 auto &
DL = Ext->getModule()->getDataLayout();
14828 std::advance(GTI, U.getOperandNo()-1);
14841 if (ShiftAmt == 0 || ShiftAmt > 4)
14845 case Instruction::Trunc:
14848 if (Instr->getType() == Ext->getOperand(0)->getType())
14862 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
14863 return all_equal(Shuf->getShuffleMask());
14870 bool AllowSplat =
false) {
14871 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
14872 auto *FullTy = FullV->
getType();
14873 auto *HalfTy = HalfV->getType();
14875 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
14878 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
14879 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
14880 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
14881 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
14885 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
14899 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
14900 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
14907 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
14914 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
14915 (M2Start != 0 && M2Start != (NumElements / 2)))
14917 if (S1Op1 && S2Op1 && M1Start != M2Start)
14927 return Ext->getType()->getScalarSizeInBits() ==
14928 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
14933 !areExtDoubled(cast<Instruction>(Ext1)) ||
14934 !areExtDoubled(cast<Instruction>(Ext2)))
14942 Value *VectorOperand =
nullptr;
14947 isa<FixedVectorType>(VectorOperand->
getType()) &&
14948 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
14958 auto *
GEP = dyn_cast<GetElementPtrInst>(Ptrs);
14959 if (!
GEP ||
GEP->getNumOperands() != 2)
14963 Value *Offsets =
GEP->getOperand(1);
14966 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
14970 if (isa<SExtInst>(Offsets) || isa<ZExtInst>(Offsets)) {
14971 auto *OffsetsInst = cast<Instruction>(Offsets);
14972 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
14973 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
14988 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
15000 switch (II->getIntrinsicID()) {
15001 case Intrinsic::aarch64_neon_smull:
15002 case Intrinsic::aarch64_neon_umull:
15011 case Intrinsic::fma:
15012 if (isa<VectorType>(
I->getType()) &&
15013 cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
15014 !Subtarget->hasFullFP16())
15017 case Intrinsic::aarch64_neon_sqdmull:
15018 case Intrinsic::aarch64_neon_sqdmulh:
15019 case Intrinsic::aarch64_neon_sqrdmulh:
15025 return !Ops.
empty();
15026 case Intrinsic::aarch64_neon_fmlal:
15027 case Intrinsic::aarch64_neon_fmlal2:
15028 case Intrinsic::aarch64_neon_fmlsl:
15029 case Intrinsic::aarch64_neon_fmlsl2:
15035 return !Ops.
empty();
15036 case Intrinsic::aarch64_sve_ptest_first:
15037 case Intrinsic::aarch64_sve_ptest_last:
15038 if (
auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
15039 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
15041 return !Ops.
empty();
15042 case Intrinsic::aarch64_sme_write_horiz:
15043 case Intrinsic::aarch64_sme_write_vert:
15044 case Intrinsic::aarch64_sme_writeq_horiz:
15045 case Intrinsic::aarch64_sme_writeq_vert: {
15046 auto *
Idx = dyn_cast<Instruction>(II->getOperand(1));
15047 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
15052 case Intrinsic::aarch64_sme_read_horiz:
15053 case Intrinsic::aarch64_sme_read_vert:
15054 case Intrinsic::aarch64_sme_readq_horiz:
15055 case Intrinsic::aarch64_sme_readq_vert:
15056 case Intrinsic::aarch64_sme_ld1b_vert:
15057 case Intrinsic::aarch64_sme_ld1h_vert:
15058 case Intrinsic::aarch64_sme_ld1w_vert:
15059 case Intrinsic::aarch64_sme_ld1d_vert:
15060 case Intrinsic::aarch64_sme_ld1q_vert:
15061 case Intrinsic::aarch64_sme_st1b_vert:
15062 case Intrinsic::aarch64_sme_st1h_vert:
15063 case Intrinsic::aarch64_sme_st1w_vert:
15064 case Intrinsic::aarch64_sme_st1d_vert:
15065 case Intrinsic::aarch64_sme_st1q_vert:
15066 case Intrinsic::aarch64_sme_ld1b_horiz:
15067 case Intrinsic::aarch64_sme_ld1h_horiz:
15068 case Intrinsic::aarch64_sme_ld1w_horiz:
15069 case Intrinsic::aarch64_sme_ld1d_horiz:
15070 case Intrinsic::aarch64_sme_ld1q_horiz:
15071 case Intrinsic::aarch64_sme_st1b_horiz:
15072 case Intrinsic::aarch64_sme_st1h_horiz:
15073 case Intrinsic::aarch64_sme_st1w_horiz:
15074 case Intrinsic::aarch64_sme_st1d_horiz:
15075 case Intrinsic::aarch64_sme_st1q_horiz: {
15076 auto *
Idx = dyn_cast<Instruction>(II->getOperand(3));
15077 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
15082 case Intrinsic::aarch64_neon_pmull:
15088 case Intrinsic::aarch64_neon_pmull64:
15090 II->getArgOperand(1)))
15092 Ops.
push_back(&II->getArgOperandUse(0));
15093 Ops.
push_back(&II->getArgOperandUse(1));
15095 case Intrinsic::masked_gather:
15098 Ops.
push_back(&II->getArgOperandUse(0));
15100 case Intrinsic::masked_scatter:
15103 Ops.
push_back(&II->getArgOperandUse(1));
15111 switch (
I->getOpcode()) {
15112 case Instruction::GetElementPtr:
15113 case Instruction::Add:
15114 case Instruction::Sub:
15115 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
15126 if (!
I->getType()->isVectorTy())
15129 switch (
I->getOpcode()) {
15130 case Instruction::Sub:
15131 case Instruction::Add: {
15137 auto Ext1 = cast<Instruction>(
I->getOperand(0));
15138 auto Ext2 = cast<Instruction>(
I->getOperand(1));
15140 Ops.
push_back(&Ext1->getOperandUse(0));
15141 Ops.
push_back(&Ext2->getOperandUse(0));
15149 case Instruction::Or: {
15152 if (Subtarget->hasNEON()) {
15159 if (
match(OtherAnd,
15162 ? cast<Instruction>(
I->getOperand(1))
15163 : cast<Instruction>(
I->getOperand(0));
15166 if (
I->getParent() != MainAnd->
getParent() ||
15171 if (
I->getParent() != IA->getParent() ||
15172 I->getParent() != IB->getParent())
15186 case Instruction::Mul: {
15187 int NumZExts = 0, NumSExts = 0;
15188 for (
auto &
Op :
I->operands()) {
15190 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
15225 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
15230 dyn_cast<ConstantInt>(Insert->getOperand(2));
15232 if (!ElementConstant || !ElementConstant->
isZero())
15235 unsigned Opcode = OperandInstr->
getOpcode();
15236 if (Opcode == Instruction::SExt)
15238 else if (Opcode == Instruction::ZExt)
15243 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
15245 const DataLayout &
DL =
I->getFunction()->getParent()->getDataLayout();
15256 return !Ops.
empty() && (NumSExts == 2 || NumZExts == 2);
15265 bool IsLittleEndian) {
15267 auto *SrcTy = cast<FixedVectorType>(
Op->getType());
15268 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15269 auto DstWidth = cast<IntegerType>(DstTy->
getElementType())->getBitWidth();
15270 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15273 assert(DstWidth % SrcWidth == 0 &&
15274 "TBL lowering is not supported for a ZExt instruction with this "
15275 "source & destination element type.");
15276 unsigned ZExtFactor = DstWidth / SrcWidth;
15277 unsigned NumElts = SrcTy->getNumElements();
15283 for (
unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15284 if (IsLittleEndian) {
15285 if (i % ZExtFactor == 0)
15286 Mask.push_back(i / ZExtFactor);
15288 Mask.push_back(NumElts);
15290 if ((i + 1) % ZExtFactor == 0)
15291 Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15293 Mask.push_back(NumElts);
15301 if (DstTy != ZExt->
getType())
15311 int NumElements = cast<FixedVectorType>(TI->
getType())->getNumElements();
15313 auto *DstTy = cast<FixedVectorType>(TI->
getType());
15314 assert(SrcTy->getElementType()->isIntegerTy() &&
15315 "Non-integer type source vector element is not supported");
15316 assert(DstTy->getElementType()->isIntegerTy(8) &&
15317 "Unsupported destination vector element type");
15318 unsigned SrcElemTySz =
15319 cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15320 unsigned DstElemTySz =
15321 cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15322 assert((SrcElemTySz % DstElemTySz == 0) &&
15323 "Cannot lower truncate to tbl instructions for a source element size "
15324 "that is not divisible by the destination element size");
15325 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
15326 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
15327 "Unsupported source vector element type size");
15335 for (
int Itr = 0; Itr < 16; Itr++) {
15336 if (Itr < NumElements)
15338 IsLittleEndian ? Itr * TruncFactor
15339 : Itr * TruncFactor + (TruncFactor - 1)));
15344 int MaxTblSz = 128 * 4;
15345 int MaxSrcSz = SrcElemTySz * NumElements;
15347 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
15348 assert(ElemsPerTbl <= 16 &&
15349 "Maximum elements selected using TBL instruction cannot exceed 16!");
15351 int ShuffleCount = 128 / SrcElemTySz;
15353 for (
int i = 0; i < ShuffleCount; ++i)
15360 while (ShuffleLanes.
back() < NumElements) {
15364 if (Parts.
size() == 4) {
15366 Intrinsic::aarch64_neon_tbl4, VecTy);
15372 for (
int i = 0; i < ShuffleCount; ++i)
15373 ShuffleLanes[i] += ShuffleCount;
15377 "Lowering trunc for vectors requiring different TBL instructions is "
15381 if (!Parts.
empty()) {
15383 switch (Parts.
size()) {
15385 TblID = Intrinsic::aarch64_neon_tbl1;
15388 TblID = Intrinsic::aarch64_neon_tbl2;
15391 TblID = Intrinsic::aarch64_neon_tbl3;
15403 "more than 2 tbl instructions!");
15406 if (ElemsPerTbl < 16) {
15408 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
15413 if (ElemsPerTbl < 16) {
15414 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
15415 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
15417 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
15439 if (!L || L->getHeader() !=
I->getParent() ||
F->hasMinSize() ||
15443 auto *SrcTy = dyn_cast<FixedVectorType>(
I->getOperand(0)->getType());
15444 auto *DstTy = dyn_cast<FixedVectorType>(
I->getType());
15445 if (!SrcTy || !DstTy)
15451 auto *ZExt = dyn_cast<ZExtInst>(
I);
15452 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
15453 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
15454 if (DstWidth % 8 != 0)
15457 auto *TruncDstType =
15461 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
15465 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
15468 DstTy = TruncDstType;
15474 auto *UIToFP = dyn_cast<UIToFPInst>(
I);
15475 if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
15476 DstTy->getElementType()->isFloatTy()) {
15478 auto *ZExt = cast<ZExtInst>(
15481 I->replaceAllUsesWith(UI);
15482 I->eraseFromParent();
15489 auto *FPToUI = dyn_cast<FPToUIInst>(
I);
15491 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
15492 SrcTy->getElementType()->isFloatTy() &&
15493 DstTy->getElementType()->isIntegerTy(8)) {
15495 auto *WideConv = Builder.
CreateFPToUI(FPToUI->getOperand(0),
15497 auto *TruncI = Builder.
CreateTrunc(WideConv, DstTy);
15498 I->replaceAllUsesWith(TruncI);
15499 I->eraseFromParent();
15508 auto *TI = dyn_cast<TruncInst>(
I);
15509 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
15510 ((SrcTy->getElementType()->isIntegerTy(32) ||
15511 SrcTy->getElementType()->isIntegerTy(64)) &&
15512 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
15521 Align &RequiredAligment)
const {
15526 RequiredAligment =
Align(1);
15528 return NumBits == 32 || NumBits == 64;
15535 unsigned VecSize = 128;
15540 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
15555 unsigned MinElts = EC.getKnownMinValue();
15557 UseScalable =
false;
15574 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
15577 if (EC.isScalable()) {
15578 UseScalable =
true;
15579 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
15582 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
15588 UseScalable =
true;
15594 return VecSize == 64 || VecSize % 128 == 0;
15626 bool Scalable,
Type *LDVTy,
15628 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
15629 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
15630 Intrinsic::aarch64_sve_ld3_sret,
15631 Intrinsic::aarch64_sve_ld4_sret};
15632 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
15633 Intrinsic::aarch64_neon_ld3,
15634 Intrinsic::aarch64_neon_ld4};
15642 bool Scalable,
Type *STVTy,
15644 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
15645 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
15646 Intrinsic::aarch64_sve_st3,
15647 Intrinsic::aarch64_sve_st4};
15648 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
15649 Intrinsic::aarch64_neon_st3,
15650 Intrinsic::aarch64_neon_st4};
15672 "Invalid interleave factor");
15673 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
15675 "Unmatched number of shufflevectors and indices");
15685 if (!Subtarget->hasNEON() ||
15691 auto *FVTy = cast<FixedVectorType>(VTy);
15695 Type *EltTy = FVTy->getElementType();
15703 FVTy->getNumElements() / NumLoads);
15715 LDVTy->getElementCount());
15718 UseScalable, LDVTy, PtrTy);
15725 Value *PTrue =
nullptr;
15727 std::optional<unsigned> PgPattern =
15732 PgPattern = AArch64SVEPredPattern::all;
15736 PTrue = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
15740 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
15746 FVTy->getNumElements() * Factor);
15750 LdN = Builder.
CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
15752 LdN = Builder.
CreateCall(LdNFunc, BaseAddr,
"ldN");
15755 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
15757 unsigned Index = Indices[i];
15770 FVTy->getNumElements()));
15772 SubVecs[SVI].push_back(SubVec);
15781 auto &SubVec = SubVecs[SVI];
15784 SVI->replaceAllUsesWith(WideVec);
15790template <
typename Iter>
15792 int MaxLookupDist = 20;
15793 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
15794 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
15795 const Value *PtrA1 =
15796 Ptr->stripAndAccumulateInBoundsConstantOffsets(
DL, OffsetA);
15798 while (++It !=
End) {
15799 if (It->isDebugOrPseudoInst())
15801 if (MaxLookupDist-- == 0)
15803 if (
const auto *SI = dyn_cast<StoreInst>(&*It)) {
15804 const Value *PtrB1 =
15805 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
15807 if (PtrA1 == PtrB1 &&
15808 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
15845 unsigned Factor)
const {
15848 "Invalid interleave factor");
15850 auto *VecTy = cast<FixedVectorType>(SVI->
getType());
15851 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
15853 unsigned LaneLen = VecTy->getNumElements() / Factor;
15854 Type *EltTy = VecTy->getElementType();
15857 const DataLayout &
DL = SI->getModule()->getDataLayout();
15863 if (!Subtarget->hasNEON() ||
15876 Type *IntTy =
DL.getIntPtrType(EltTy);
15877 unsigned NumOpElts =
15878 cast<FixedVectorType>(Op0->
getType())->getNumElements();
15890 LaneLen /= NumStores;
15897 Value *BaseAddr = SI->getPointerOperand();
15911 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
15919 Type *PtrTy = SI->getPointerOperandType();
15921 STVTy->getElementCount());
15924 UseScalable, STVTy, PtrTy);
15926 Value *PTrue =
nullptr;
15928 std::optional<unsigned> PgPattern =
15933 DL.getTypeSizeInBits(SubVecTy))
15934 PgPattern = AArch64SVEPredPattern::all;
15938 PTrue = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
15942 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
15947 for (
unsigned i = 0; i < Factor; i++) {
15949 unsigned IdxI = StoreCount * LaneLen * Factor + i;
15950 if (Mask[IdxI] >= 0) {
15954 unsigned StartMask = 0;
15955 for (
unsigned j = 1; j < LaneLen; j++) {
15956 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
15957 if (Mask[IdxJ] >= 0) {
15958 StartMask = Mask[IdxJ] - j;
15984 if (StoreCount > 0)
15986 BaseAddr, LaneLen * Factor);
15997 if (DI->
getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
16001 const unsigned Factor = 2;
16022 UseScalable, LdTy, PtrTy);
16026 Value *Pred =
nullptr;
16033 if (NumLoads > 1) {
16037 for (
unsigned I = 0;
I < NumLoads; ++
I) {
16041 Value *LdN =
nullptr;
16060 Result = Builder.
CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
16062 Result = Builder.
CreateCall(LdNFunc, BaseAddr,
"ldN");
16072 if (II->
getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
16076 const unsigned Factor = 2;
16095 Type *PtrTy = SI->getPointerOperandType();
16097 UseScalable, StTy, PtrTy);
16101 Value *BaseAddr = SI->getPointerOperand();
16102 Value *Pred =
nullptr;
16111 for (
unsigned I = 0;
I < NumStores; ++
I) {
16113 if (NumStores > 1) {
16134 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
16135 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
16136 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
16140 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
16141 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
16142 if (
Op.isAligned(AlignCheck))
16150 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
16151 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
16153 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
16155 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
16157 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
16164 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
16165 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
16166 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
16170 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
16171 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
16172 if (
Op.isAligned(AlignCheck))
16180 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
16181 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
16183 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
16185 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
16187 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
16194 if (Immed == std::numeric_limits<int64_t>::min()) {
16196 <<
": avoid UB for INT64_MIN\n");
16200 Immed = std::abs(Immed);
16201 bool IsLegal = ((Immed >> 12) == 0 ||
16202 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
16204 <<
" legal add imm: " << (IsLegal ?
"yes" :
"no") <<
"\n");
16230 if (
Insn.size() > 1)
16267 if (AM.
Scale == 1) {
16270 }
else if (AM.
Scale == 2) {
16283 if (isa<ScalableVectorType>(Ty)) {
16285 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
16297 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
16298 NumBytes = NumBits / 8;
16311 int64_t MaxOffset)
const {
16312 int64_t HighPart = MinOffset & ~0xfffULL;
16335 return Subtarget->hasFullFP16();
16368 static const MCPhysReg ScratchRegs[] = {
16369 AArch64::X16, AArch64::X17, AArch64::LR, 0
16371 return ScratchRegs;
16375 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
16384 "Expected shift op");
16386 SDValue ShiftLHS =
N->getOperand(0);
16387 EVT VT =
N->getValueType(0);
16393 isa<ConstantSDNode>(ShiftLHS.
getOperand(1))) {
16398 if (
auto *SRLC = dyn_cast<ConstantSDNode>(AndLHS.
getOperand(1))) {
16400 if (
auto *SHLC = dyn_cast<ConstantSDNode>(
N->getOperand(1)))
16401 return SRLC->getZExtValue() == SHLC->getZExtValue();
16413 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
16414 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
16415 "Expected XOR(SHIFT) pattern");
16418 auto *XorC = dyn_cast<ConstantSDNode>(
N->getOperand(1));
16419 auto *ShiftC = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
16420 if (XorC && ShiftC) {
16421 unsigned MaskIdx, MaskLen;
16422 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
16423 unsigned ShiftAmt = ShiftC->getZExtValue();
16424 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
16425 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
16426 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
16427 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
16437 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
16439 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
16440 "Expected shift-shift mask");
16442 if (!
N->getOperand(0)->hasOneUse())
16446 EVT VT =
N->getValueType(0);
16447 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
16448 auto *C1 = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
16449 auto *C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
16450 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
16457 unsigned BinOpcode,
EVT VT)
const {
16469 int64_t Val = Imm.getSExtValue();
16473 if ((int64_t)Val < 0)
16476 Val &= (1LL << 32) - 1;
16484 unsigned Index)
const {
16497 EVT VT =
N->getValueType(0);
16498 if (!Subtarget->hasNEON() || !VT.
isVector())
16510 auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.
getOperand(1));
16512 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
16536 if (
N->getValueType(0) != MVT::i32)
16539 SDValue VecReduceOp0 =
N->getOperand(0);
16540 unsigned Opcode = VecReduceOp0.
getOpcode();
16547 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
16548 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
16551 SDValue SUB = ABS->getOperand(0);
16552 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
16553 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
16555 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
16556 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
16560 bool IsZExt =
false;
16568 SDValue EXT0 = SUB->getOperand(0);
16569 SDValue EXT1 = SUB->getOperand(1);
16586 UABDHigh8Op0, UABDHigh8Op1);
16597 UABDLo8Op0, UABDLo8Op1);
16616 if (!ST->hasDotProd())
16630 if (
A.getOpcode() !=
B.getOpcode() ||
16631 A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
16633 ExtOpcode =
A.getOpcode();
16638 EVT Op0VT =
A.getOperand(0).getValueType();
16641 if (!IsValidElementCount || !IsValidSize)
16650 B =
B.getOperand(0);
16653 unsigned NumOfVecReduce;
16655 if (IsMultipleOf16) {
16657 TargetType = MVT::v4i32;
16660 TargetType = MVT::v2i32;
16665 if (NumOfVecReduce == 1) {
16668 A.getOperand(0),
B);
16675 for (;
I < VecReduce16Num;
I += 1) {
16694 if (VecReduce8Num == 0)
16695 return VecReduceAdd16;
16718 auto DetectAddExtract = [&](
SDValue A) {
16722 EVT VT =
A.getValueType();
16750 if (
SDValue R = DetectAddExtract(
A))
16753 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
16757 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
16770 EVT VT =
A.getValueType();
16771 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
16782 if (ExtVT0 != ExtVT1 ||
16825AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
16832 EVT VT =
N->getValueType(0);
16840 if ((VT != MVT::i32 && VT != MVT::i64) ||
16848AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
16855 EVT VT =
N->getValueType(0);
16863 if ((VT != MVT::i32 && VT != MVT::i64) ||
16905 case Intrinsic::aarch64_sve_cntb:
16907 case Intrinsic::aarch64_sve_cnth:
16909 case Intrinsic::aarch64_sve_cntw:
16911 case Intrinsic::aarch64_sve_cntd:
16939 return TypeNode->
getVT();
16949 if (Mask == UCHAR_MAX)
16951 else if (Mask == USHRT_MAX)
16953 else if (Mask == UINT_MAX)
16975 unsigned ExtendOpcode = Extend.
getOpcode();
16990 if (PreExtendType == MVT::Other ||
16998 unsigned Opc =
Op.getOpcode();
17009 EVT PreExtendLegalType =
17015 PreExtendLegalType));
17023 cast<ShuffleVectorSDNode>(BV)->getMask());
17032 EVT VT =
Mul->getValueType(0);
17033 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
17044 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
17045 Op1 ? Op1 :
Mul->getOperand(1));
17051 EVT VT =
N->getValueType(0);
17052 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
17053 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
17055 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
17056 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
17069 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
17070 V3 != (HalfSize - 1))
17099 EVT VT =
N->getValueType(0);
17103 unsigned AddSubOpc;
17105 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
17106 AddSubOpc = V->getOpcode();
17108 SDValue Opnd = V->getOperand(1);
17109 MulOper = V->getOperand(0);
17112 if (
auto C = dyn_cast<ConstantSDNode>(Opnd))
17118 if (IsAddSubWith1(N0)) {
17120 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
17123 if (IsAddSubWith1(N1)) {
17125 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
17129 if (!isa<ConstantSDNode>(N1))
17133 const APInt &ConstValue =
C->getAPIntValue();
17140 if (ConstValue.
sge(1) && ConstValue.
sle(16))
17155 unsigned TrailingZeroes = ConstValue.
countr_zero();
17156 if (TrailingZeroes) {
17164 if (
N->hasOneUse() && (
N->use_begin()->getOpcode() ==
ISD::ADD ||
17165 N->use_begin()->getOpcode() ==
ISD::SUB))
17170 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
17173 auto Shl = [&](
SDValue N0,
unsigned N1) {
17193 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
17212 APInt SCVMinus1 = ShiftedConstValue - 1;
17213 APInt SCVPlus1 = ShiftedConstValue + 1;
17214 APInt CVPlus1 = ConstValue + 1;
17218 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
17221 return Sub(Shl(N0, ShiftAmt), N0);
17223 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
17224 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
17225 }
else if (Subtarget->hasALULSLFast() &&
17226 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
17227 APInt CVMMinus1 = CVM - 1;
17228 APInt CVNMinus1 = CVN - 1;
17229 unsigned ShiftM1 = CVMMinus1.
logBase2();
17230 unsigned ShiftN1 = CVNMinus1.
logBase2();
17232 if (ShiftM1 <= 3 && ShiftN1 <= 3) {
17234 return Add(Shl(MVal, ShiftN1), MVal);
17241 APInt SCVPlus1 = -ShiftedConstValue + 1;
17242 APInt CVNegPlus1 = -ConstValue + 1;
17243 APInt CVNegMinus1 = -ConstValue - 1;
17246 return Sub(N0, Shl(N0, ShiftAmt));
17248 ShiftAmt = CVNegMinus1.
logBase2();
17249 return Negate(
Add(Shl(N0, ShiftAmt), N0));
17251 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
17252 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
17272 EVT VT =
N->getValueType(0);
17274 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
17275 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
17283 dyn_cast<BuildVectorSDNode>(
N->getOperand(0)->getOperand(1))) {
17285 if (!BV->isConstant())
17290 EVT IntVT = BV->getValueType(0);
17297 N->getOperand(0)->getOperand(0), MaskConst);
17312 EVT VT =
N->getValueType(0);
17313 if (VT != MVT::f32 && VT != MVT::f64)
17317 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
17327 !cast<LoadSDNode>(N0)->isVolatile()) {
17353 if (!
N->getValueType(0).isSimple())
17357 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
17360 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
17364 if (!isa<BuildVectorSDNode>(ConstVec))
17367 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
17369 if (FloatBits != 32 && FloatBits != 64 &&
17370 (FloatBits != 16 || !Subtarget->hasFullFP16()))
17373 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
17375 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
17379 if (IntBits > FloatBits)
17384 int32_t Bits = IntBits == 64 ? 64 : 32;
17386 if (
C == -1 ||
C == 0 ||
C > Bits)
17389 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
17395 EVT SatVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
17403 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
17404 : Intrinsic::aarch64_neon_vcvtfp2fxu;
17410 if (IntBits < FloatBits)
17421 if (!Subtarget->hasNEON())
17425 unsigned Opc =
Op->getOpcode();
17426 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
17427 !
Op.getOperand(0).getValueType().isSimple() ||
17431 SDValue ConstVec =
N->getOperand(1);
17432 if (!isa<BuildVectorSDNode>(ConstVec))
17435 MVT IntTy =
Op.getOperand(0).getSimpleValueType().getVectorElementType();
17437 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
17440 MVT FloatTy =
N->getSimpleValueType(0).getVectorElementType();
17442 if (FloatBits != 32 && FloatBits != 64)
17446 if (IntBits > FloatBits)
17452 if (
C == -1 ||
C == 0 ||
C > FloatBits)
17456 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
17457 switch (NumLanes) {
17461 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
17464 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
17474 if (IntBits < FloatBits)
17478 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
17479 : Intrinsic::aarch64_neon_vcvtfxu2fp;
17487 EVT VT =
N->getValueType(0);
17513 for (
int i = 1; i >= 0; --i) {
17514 for (
int j = 1; j >= 0; --j) {
17551 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
17552 for (
int i = 1; i >= 0; --i)
17553 for (
int j = 1; j >= 0; --j) {
17556 if (!BVN0 || !BVN1)
17559 bool FoundMatch =
true;
17563 if (!CN0 || !CN1 ||
17565 FoundMatch =
false;
17589 EVT VT =
N->getValueType(0);
17637 auto *Op1 = dyn_cast<ConstantSDNode>(Cmp1.
getOperand(1));
17638 if (Op1 && Op1->getAPIntValue().isNegative() &&
17639 Op1->getAPIntValue().sgt(-32)) {
17646 NZCVOp, Condition, Cmp0);
17649 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
17660 EVT VT =
N->getValueType(0);
17681 MaskForTy = 0xffull;
17684 MaskForTy = 0xffffull;
17687 MaskForTy = 0xffffffffull;
17695 if (
auto *Op0 = dyn_cast<ConstantSDNode>(
N->getOperand(0)))
17696 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
17706 Op =
Op->getOperand(0);
17719 unsigned Opc = Src->getOpcode();
17723 SDValue UnpkOp = Src->getOperand(0);
17736 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
17737 return ((ExtVal == 0xFF && VT == MVT::i8) ||
17738 (ExtVal == 0xFFFF && VT == MVT::i16) ||
17739 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
17745 if (MaskAndTypeMatch(EltTy))
17750 auto MaskedLoadOp = dyn_cast<MaskedLoadSDNode>(UnpkOp);
17751 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
17754 if (MaskAndTypeMatch(EltTy))
17775 return N->getOperand(1);
17777 return N->getOperand(0);
17784 if (!Src.hasOneUse())
17795 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
17812 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
17832 EVT VT =
N->getValueType(0);
17838 for (
auto U :
N->uses())
17869 EVT VT =
N->getValueType(0);
17909 DefBits = ~(DefBits | ZeroSplat);
17916 UndefBits = ~(UndefBits | ZeroSplat);
17918 UndefBits, &
LHS)) ||
17932 EVT VT =
N->getValueType(0);
17935 if (!
N->getFlags().hasAllowReassociation())
17942 unsigned Opc =
A.getConstantOperandVal(0);
17943 if (Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
17944 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
17945 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
17946 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
17951 A.getOperand(2),
A.getOperand(3));
17967 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
17969 return VT == MVT::i64;
17981 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
17982 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
17983 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
17984 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
17985 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
17986 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
17987 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
17988 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
17990 N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
18053 if (VS.getConstantOperandVal(0) != NumEls)
18072 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
18074 EVT VT =
N->getValueType(0);
18103 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
18107 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
18122 {N0->getOperand(0), Extract1, Extract2});
18136 EVT VT =
N->getValueType(0);
18137 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
18161 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
18163 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
18165 for (
size_t i = 0; i < Mask.size(); ++i)
18175 if (
N->getOperand(0).getValueType() == MVT::v4i8) {
18179 if (
N->getNumOperands() % 2 == 0 &&
all_of(
N->op_values(), [](
SDValue V) {
18180 if (V.getValueType() != MVT::v4i8)
18184 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
18185 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
18186 LD->getExtensionType() == ISD::NON_EXTLOAD;
18192 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
18199 DAG.
getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
18200 LD->getMemOperand());
18221 auto isBitwiseVectorNegate = [](
SDValue V) {
18222 return V->getOpcode() ==
ISD::XOR &&
18256 if (
N->getNumOperands() == 2 && N0Opc == N1Opc &&
18276 if (N00Source == N10Source && N01Source == N11Source &&
18285 if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
18287 return DAG.
getNode(N0Opc, dl, VT, N00Source, N01Source);
18292 auto IsRSHRN = [](
SDValue Shr) {
18296 EVT VT =
Op.getValueType();
18297 unsigned ShtAmt = Shr.getConstantOperandVal(1);
18304 Op.getOperand(1).getConstantOperandVal(0)
18305 <<
Op.getOperand(1).getConstantOperandVal(1));
18307 isa<ConstantSDNode>(
Op.getOperand(1).getOperand(0)))
18309 Op.getOperand(1).getConstantOperandVal(0));
18313 if (Imm != 1ULL << (ShtAmt - 1))
18319 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
18327 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
18369 MVT RHSTy =
RHS.getValueType().getSimpleVT();
18375 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
18391 EVT VT =
N->getValueType(0);
18402 if (isa<ConstantSDNode>(V.getOperand(0)))
18413 SDValue SubVec =
N->getOperand(1);
18414 uint64_t IdxVal =
N->getConstantOperandVal(2);
18425 if (IdxVal == 0 && Vec.
isUndef())
18431 (IdxVal != 0 && IdxVal != NumSubElts))
18476 EVT ResTy =
N->getValueType(0);
18487 VecResTy = MVT::v4f32;
18489 VecResTy = MVT::v2f64;
18514 MVT VT =
N.getSimpleValueType();
18516 N.getConstantOperandVal(1) == 0)
18517 N =
N.getOperand(0);
18519 switch (
N.getOpcode()) {
18544 if (
N.getValueType().is64BitVector()) {
18556 N =
N.getOperand(0);
18559 if (
N.getOperand(0).getValueType().isScalableVector())
18561 return N.getConstantOperandAPInt(1) ==
18562 N.getOperand(0).getValueType().getVectorNumElements() / 2;
18627 if (!TValue || !FValue)
18631 if (!TValue->
isOne()) {
18678 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
18695 EVT VT =
Op->getValueType(0);
18702 EVT VT =
N->getValueType(0);
18713 auto *LHSN1 = dyn_cast<ConstantSDNode>(
LHS->getOperand(1));
18714 auto *RHSN1 = dyn_cast<ConstantSDNode>(
RHS->getOperand(1));
18715 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
18741 EVT VT =
N->getValueType(0);
18768 if (!CTVal || !CFVal)
18803 "Unexpected constant value");
18814 EVT VT =
N->getValueType(0);
18821 auto isZeroDot = [](
SDValue Dot) {
18826 if (!isZeroDot(Dot))
18828 if (!isZeroDot(Dot))
18841 EVT VT =
Op.getValueType();
18895 MVT VT =
N->getSimpleValueType(0);
18907 LHS.getOpcode() !=
RHS.getOpcode())
18910 unsigned ExtType =
LHS.getOpcode();
18916 if (!
RHS.getNode())
18922 if (!
LHS.getNode())
18933 !
Op.getNode()->hasAnyUseOfValue(0);
18940 return std::nullopt;
18943 return std::nullopt;
18949 return getInvertedCondCode(
CC);
18951 return std::nullopt;
18975 Op->getOperand(0),
Op->getOperand(1),
18988 EVT VT =
N->getValueType(0);
19001 EVT VT =
N->getValueType(0);
19007 N->getOperand(0).getOperand(0).getValueType() !=
19008 N->getOperand(1).getOperand(0).getValueType())
19011 SDValue N0 =
N->getOperand(0).getOperand(0);
19012 SDValue N1 =
N->getOperand(1).getOperand(0);
19017 if ((S2 == MVT::i32 && S1 == MVT::i8) ||
19018 (S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) {
19035 EVT VT =
N->getValueType(0);
19046 if (VT != MVT::v2i32)
19049 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
19055 isa<ConstantSDNode>(Elt1->getOperand(1)) &&
19080 EVT VT =
N->getValueType(0);
19096 unsigned Opcode =
N.getOpcode();
19100 SrcVT = cast<VTSDNode>(
N.getOperand(1))->getVT();
19102 SrcVT =
N.getOperand(0).getValueType();
19104 return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8;
19110 return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
19112 return isa<ConstantSDNode>(
N.getOperand(1));
19122 auto IsOneUseExtend = [](
SDValue N) {
19130 if (isa<ConstantSDNode>(Z) || IsOneUseExtend(Z))
19133 if (SUB.getOpcode() !=
ISD::SUB || !SUB.hasOneUse())
19136 SDValue Shift = SUB.getOperand(0);
19137 if (!IsOneUseExtend(Shift))
19141 EVT VT =
N->getValueType(0);
19157 EVT VT =
N->getValueType(0);
19158 if (VT != MVT::i32 && VT != MVT::i64)
19198 if (!
Add.hasOneUse())
19212 EVT VT =
N->getValueType(0);
19237 if (!
N->getValueType(0).isFixedLengthVector())
19244 if (!cast<ConstantSDNode>(Op1->
getOperand(1))->isZero())
19260 DAG.
getNode(
N->getOpcode(),
SDLoc(
N), ScalableVT, {ScaledOp, MulValue});
19264 if (
SDValue res = performOpt(
N->getOperand(0),
N->getOperand(1)))
19267 return performOpt(
N->getOperand(1),
N->getOperand(0));
19275 EVT VT =
N->getValueType(0);
19276 if (VT != MVT::i64)
19303 DAG.
getNode(
N->getOpcode(),
DL, MVT::v1i64, Op0, Op1),
19311 if (
auto *Ld = dyn_cast<LoadSDNode>(BV)) {
19312 if (!Ld || !Ld->isSimple())
19343 B.getOperand(1).getNumOperands() != 4)
19345 auto SV1 = cast<ShuffleVectorSDNode>(
B);
19346 auto SV2 = cast<ShuffleVectorSDNode>(
B.getOperand(0));
19347 int NumElts =
B.getValueType().getVectorNumElements();
19348 int NumSubElts = NumElts / 4;
19349 for (
int I = 0;
I < NumSubElts;
I++) {
19351 if (SV1->getMaskElt(
I) !=
I ||
19352 SV1->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
19353 SV1->getMaskElt(
I + NumSubElts * 2) !=
I + NumSubElts * 2 ||
19354 SV1->getMaskElt(
I + NumSubElts * 3) !=
I + NumElts)
19357 if (SV2->getMaskElt(
I) !=
I ||
19358 SV2->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
19359 SV2->getMaskElt(
I + NumSubElts * 2) !=
I + NumElts)
19362 auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0));
19363 auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1));
19364 auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0));
19365 auto *Ld3 = dyn_cast<LoadSDNode>(
B.getOperand(1).getOperand(0));
19366 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
19367 !Ld2->isSimple() || !Ld3->isSimple())
19380 unsigned &NumSubLoads) {
19387 if (NumSubLoads && Loads0.
size() != NumSubLoads)
19389 NumSubLoads = Loads0.
size();
19390 return Loads0.
size() == Loads1.
size() &&
19391 all_of(
zip(Loads0, Loads1), [&DAG](
auto L) {
19392 unsigned Size = get<0>(L)->getValueType(0).getSizeInBits();
19393 return Size == get<1>(L)->getValueType(0).getSizeInBits() &&
19406 DAG, NumSubLoads) &&
19434 EVT VT =
N->getValueType(0);
19452 Other.getOperand(0).getValueType() ||
19459 unsigned NumSubLoads = 0;
19468 unsigned NumSubElts = NumElts / NumSubLoads;
19490 for (
const auto &[L0, L1] :
zip(Loads0, Loads1)) {
19492 L0->getBasePtr(), L0->getPointerInfo(),
19493 L0->getOriginalAlign());
19503 Ops.
push_back(GenCombinedTree(O0, O1, DAG));
19506 SDValue NewOp = GenCombinedTree(Op0, Op1, DAG);
19509 int Hi = NumSubElts,
Lo = 0;
19510 for (
unsigned i = 0; i < NumSubLoads; i++) {
19511 for (
unsigned j = 0; j < NumSubElts; j++) {
19512 LowMask[i * NumSubElts + j] =
Lo++;
19513 HighMask[i * NumSubElts + j] =
Hi++;
19548 return DAG.
getNode(
N->getOpcode(),
DL, VT, Ext0, NShift);
19594 assert(
LHS.getValueType().is64BitVector() &&
19595 RHS.getValueType().is64BitVector() &&
19596 "unexpected shape for long operation");
19603 if (!
RHS.getNode())
19607 if (!
LHS.getNode())
19620 MVT ElemTy =
N->getSimpleValueType(0).getScalarType();
19623 int64_t ShiftAmount;
19625 APInt SplatValue, SplatUndef;
19626 unsigned SplatBitSize;
19629 HasAnyUndefs, ElemBits) ||
19630 SplatBitSize != ElemBits)
19634 }
else if (
ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(
N->getOperand(2))) {
19635 ShiftAmount = CVN->getSExtValue();
19640 if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu)
19641 return N->getOperand(1);
19648 case Intrinsic::aarch64_neon_sqshl:
19650 IsRightShift =
false;
19652 case Intrinsic::aarch64_neon_uqshl:
19654 IsRightShift =
false;
19656 case Intrinsic::aarch64_neon_srshl:
19658 IsRightShift =
true;
19660 case Intrinsic::aarch64_neon_urshl:
19662 IsRightShift =
true;
19664 case Intrinsic::aarch64_neon_sqshlu:
19666 IsRightShift =
false;
19668 case Intrinsic::aarch64_neon_sshl:
19669 case Intrinsic::aarch64_neon_ushl:
19673 if (ShiftAmount < 0) {
19676 ShiftAmount = -ShiftAmount;
19679 IsRightShift =
false;
19683 EVT VT =
N->getValueType(0);
19686 if (VT == MVT::i64) {
19691 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(
int)ElemBits) {
19694 if (
N->getValueType(0) == MVT::i64)
19698 }
else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
19701 if (
N->getValueType(0) == MVT::i64)
19723 N->getOperand(0),
N->getOperand(1), AndN.
getOperand(0));
19731 N->getOperand(1).getSimpleValueType(),
19741 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
19742 ScalarTy = MVT::i32;
19754 SDValue Scalar =
N->getOperand(3);
19755 EVT ScalarTy = Scalar.getValueType();
19757 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
19760 SDValue Passthru =
N->getOperand(1);
19763 Pred, Scalar, Passthru);
19769 EVT VT =
N->getValueType(0);
19798 SDValue Comparator =
N->getOperand(3);
19802 EVT VT =
N->getValueType(0);
19803 EVT CmpVT =
N->getOperand(2).getValueType();
19814 case Intrinsic::aarch64_sve_cmpeq_wide:
19815 case Intrinsic::aarch64_sve_cmpne_wide:
19816 case Intrinsic::aarch64_sve_cmpge_wide:
19817 case Intrinsic::aarch64_sve_cmpgt_wide:
19818 case Intrinsic::aarch64_sve_cmplt_wide:
19819 case Intrinsic::aarch64_sve_cmple_wide: {
19820 if (
auto *CN = dyn_cast<ConstantSDNode>(Comparator.
getOperand(0))) {
19821 int64_t ImmVal = CN->getSExtValue();
19822 if (ImmVal >= -16 && ImmVal <= 15)
19830 case Intrinsic::aarch64_sve_cmphs_wide:
19831 case Intrinsic::aarch64_sve_cmphi_wide:
19832 case Intrinsic::aarch64_sve_cmplo_wide:
19833 case Intrinsic::aarch64_sve_cmpls_wide: {
19834 if (
auto *CN = dyn_cast<ConstantSDNode>(Comparator.
getOperand(0))) {
19835 uint64_t ImmVal = CN->getZExtValue();
19861 assert(
Op.getValueType().isScalableVector() &&
19863 "Expected legal scalable vector type!");
19865 "Expected same type for PTEST operands");
19873 if (
Op.getValueType() != MVT::nxv16i1) {
19885 DL, MVT::Other, Pg,
Op);
19899 SDValue VecToReduce =
N->getOperand(2);
19918 SDValue VecToReduce =
N->getOperand(2);
19935 SDValue InitVal =
N->getOperand(2);
19936 SDValue VecToReduce =
N->getOperand(3);
19943 DAG.
getUNDEF(ReduceVT), InitVal, Zero);
19945 SDValue Reduce = DAG.
getNode(Opc,
DL, ReduceVT, Pred, InitVal, VecToReduce);
19958 bool SwapOperands =
false) {
19960 assert(
N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!");
19962 SDValue Op1 =
N->getOperand(SwapOperands ? 3 : 2);
19963 SDValue Op2 =
N->getOperand(SwapOperands ? 2 : 3);
19968 return DAG.
getNode(Opc,
SDLoc(
N),
N->getValueType(0), Op1, Op2);
19970 return DAG.
getNode(Opc,
SDLoc(
N),
N->getValueType(0), Pg, Op1, Op2);
19985 case Intrinsic::get_active_lane_mask: {
19987 EVT VT =
N->getValueType(0);
20010 N->getOperand(1),
N->getOperand(2));
20018 case Intrinsic::aarch64_neon_vcvtfxs2fp:
20019 case Intrinsic::aarch64_neon_vcvtfxu2fp:
20021 case Intrinsic::aarch64_neon_saddv:
20023 case Intrinsic::aarch64_neon_uaddv:
20025 case Intrinsic::aarch64_neon_sminv:
20027 case Intrinsic::aarch64_neon_uminv:
20029 case Intrinsic::aarch64_neon_smaxv:
20031 case Intrinsic::aarch64_neon_umaxv:
20033 case Intrinsic::aarch64_neon_fmax:
20035 N->getOperand(1),
N->getOperand(2));
20036 case Intrinsic::aarch64_neon_fmin:
20038 N->getOperand(1),
N->getOperand(2));
20039 case Intrinsic::aarch64_neon_fmaxnm:
20041 N->getOperand(1),
N->getOperand(2));
20042 case Intrinsic::aarch64_neon_fminnm:
20044 N->getOperand(1),
N->getOperand(2));
20045 case Intrinsic::aarch64_neon_smull:
20047 N->getOperand(1),
N->getOperand(2));
20048 case Intrinsic::aarch64_neon_umull:
20050 N->getOperand(1),
N->getOperand(2));
20051 case Intrinsic::aarch64_neon_pmull:
20053 N->getOperand(1),
N->getOperand(2));
20054 case Intrinsic::aarch64_neon_sqdmull:
20056 case Intrinsic::aarch64_neon_sqshl:
20057 case Intrinsic::aarch64_neon_uqshl:
20058 case Intrinsic::aarch64_neon_sqshlu:
20059 case Intrinsic::aarch64_neon_srshl:
20060 case Intrinsic::aarch64_neon_urshl:
20061 case Intrinsic::aarch64_neon_sshl:
20062 case Intrinsic::aarch64_neon_ushl:
20064 case Intrinsic::aarch64_neon_sabd:
20066 N->getOperand(1),
N->getOperand(2));
20067 case Intrinsic::aarch64_neon_uabd:
20069 N->getOperand(1),
N->getOperand(2));
20070 case Intrinsic::aarch64_crc32b:
20071 case Intrinsic::aarch64_crc32cb:
20073 case Intrinsic::aarch64_crc32h:
20074 case Intrinsic::aarch64_crc32ch:
20076 case Intrinsic::aarch64_sve_saddv:
20078 if (
N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
20082 case Intrinsic::aarch64_sve_uaddv:
20084 case Intrinsic::aarch64_sve_smaxv:
20086 case Intrinsic::aarch64_sve_umaxv:
20088 case Intrinsic::aarch64_sve_sminv:
20090 case Intrinsic::aarch64_sve_uminv:
20092 case Intrinsic::aarch64_sve_orv:
20094 case Intrinsic::aarch64_sve_eorv:
20096 case Intrinsic::aarch64_sve_andv:
20098 case Intrinsic::aarch64_sve_index:
20100 case Intrinsic::aarch64_sve_dup:
20102 case Intrinsic::aarch64_sve_dup_x:
20105 case Intrinsic::aarch64_sve_ext:
20107 case Intrinsic::aarch64_sve_mul_u:
20109 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20110 case Intrinsic::aarch64_sve_smulh_u:
20112 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20113 case Intrinsic::aarch64_sve_umulh_u:
20115 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20116 case Intrinsic::aarch64_sve_smin_u:
20118 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20119 case Intrinsic::aarch64_sve_umin_u:
20121 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20122 case Intrinsic::aarch64_sve_smax_u:
20124 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20125 case Intrinsic::aarch64_sve_umax_u:
20127 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20128 case Intrinsic::aarch64_sve_lsl_u:
20130 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20131 case Intrinsic::aarch64_sve_lsr_u:
20133 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20134 case Intrinsic::aarch64_sve_asr_u:
20136 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20137 case Intrinsic::aarch64_sve_fadd_u:
20139 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20140 case Intrinsic::aarch64_sve_fdiv_u:
20142 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20143 case Intrinsic::aarch64_sve_fmax_u:
20145 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20146 case Intrinsic::aarch64_sve_fmaxnm_u:
20148 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20149 case Intrinsic::aarch64_sve_fmla_u:
20151 N->getOperand(1),
N->getOperand(3),
N->getOperand(4),
20153 case Intrinsic::aarch64_sve_fmin_u:
20155 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20156 case Intrinsic::aarch64_sve_fminnm_u:
20158 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20159 case Intrinsic::aarch64_sve_fmul_u:
20161 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20162 case Intrinsic::aarch64_sve_fsub_u:
20164 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20165 case Intrinsic::aarch64_sve_add_u:
20168 case Intrinsic::aarch64_sve_sub_u:
20171 case Intrinsic::aarch64_sve_subr:
20173 case Intrinsic::aarch64_sve_and_u:
20176 case Intrinsic::aarch64_sve_bic_u:
20178 N->getOperand(2),
N->getOperand(3));
20179 case Intrinsic::aarch64_sve_eor_u:
20182 case Intrinsic::aarch64_sve_orr_u:
20185 case Intrinsic::aarch64_sve_sabd_u:
20187 N->getOperand(2),
N->getOperand(3));
20188 case Intrinsic::aarch64_sve_uabd_u:
20190 N->getOperand(2),
N->getOperand(3));
20191 case Intrinsic::aarch64_sve_sdiv_u:
20193 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20194 case Intrinsic::aarch64_sve_udiv_u:
20196 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20197 case Intrinsic::aarch64_sve_sqadd:
20199 case Intrinsic::aarch64_sve_sqsub_u:
20201 N->getOperand(2),
N->getOperand(3));
20202 case Intrinsic::aarch64_sve_uqadd:
20204 case Intrinsic::aarch64_sve_uqsub_u:
20206 N->getOperand(2),
N->getOperand(3));
20207 case Intrinsic::aarch64_sve_sqadd_x:
20209 N->getOperand(1),
N->getOperand(2));
20210 case Intrinsic::aarch64_sve_sqsub_x:
20212 N->getOperand(1),
N->getOperand(2));
20213 case Intrinsic::aarch64_sve_uqadd_x:
20215 N->getOperand(1),
N->getOperand(2));
20216 case Intrinsic::aarch64_sve_uqsub_x:
20218 N->getOperand(1),
N->getOperand(2));
20219 case Intrinsic::aarch64_sve_asrd:
20221 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20222 case Intrinsic::aarch64_sve_cmphs:
20223 if (!
N->getOperand(2).getValueType().isFloatingPoint())
20225 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20228 case Intrinsic::aarch64_sve_cmphi:
20229 if (!
N->getOperand(2).getValueType().isFloatingPoint())
20231 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20234 case Intrinsic::aarch64_sve_fcmpge:
20235 case Intrinsic::aarch64_sve_cmpge:
20237 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20240 case Intrinsic::aarch64_sve_fcmpgt:
20241 case Intrinsic::aarch64_sve_cmpgt:
20243 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20246 case Intrinsic::aarch64_sve_fcmpeq:
20247 case Intrinsic::aarch64_sve_cmpeq:
20249 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20252 case Intrinsic::aarch64_sve_fcmpne:
20253 case Intrinsic::aarch64_sve_cmpne:
20255 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20258 case Intrinsic::aarch64_sve_fcmpuo:
20260 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20263 case Intrinsic::aarch64_sve_fadda:
20265 case Intrinsic::aarch64_sve_faddv:
20267 case Intrinsic::aarch64_sve_fmaxnmv:
20269 case Intrinsic::aarch64_sve_fmaxv:
20271 case Intrinsic::aarch64_sve_fminnmv:
20273 case Intrinsic::aarch64_sve_fminv:
20275 case Intrinsic::aarch64_sve_sel:
20277 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
20278 case Intrinsic::aarch64_sve_cmpeq_wide:
20280 case Intrinsic::aarch64_sve_cmpne_wide:
20282 case Intrinsic::aarch64_sve_cmpge_wide:
20284 case Intrinsic::aarch64_sve_cmpgt_wide:
20286 case Intrinsic::aarch64_sve_cmplt_wide:
20288 case Intrinsic::aarch64_sve_cmple_wide:
20290 case Intrinsic::aarch64_sve_cmphs_wide:
20292 case Intrinsic::aarch64_sve_cmphi_wide:
20294 case Intrinsic::aarch64_sve_cmplo_wide:
20296 case Intrinsic::aarch64_sve_cmpls_wide:
20298 case Intrinsic::aarch64_sve_ptest_any:
20299 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20301 case Intrinsic::aarch64_sve_ptest_first:
20302 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20304 case Intrinsic::aarch64_sve_ptest_last:
20305 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
20312 unsigned OC =
N->getOpcode();
20328 const SDValue SetCC =
N->getOperand(0);
20350 SDLoc(SetCC),
N->getValueType(0), Ext1, Ext2,
20365 (
N->getOperand(0).getOpcode() ==
ISD::ABDU ||
20366 N->getOperand(0).getOpcode() ==
ISD::ABDS)) {
20367 SDNode *ABDNode =
N->getOperand(0).getNode();
20376 if (
N->getValueType(0).isFixedLengthVector() &&
20385 SDValue SplatVal,
unsigned NumVecElts) {
20404 if (BasePtr->getOpcode() ==
ISD::ADD &&
20405 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
20406 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
20407 BasePtr = BasePtr->getOperand(0);
20410 unsigned Offset = EltOffset;
20411 while (--NumVecElts) {
20427 assert(ContentTy.
isSimple() &&
"No SVE containers for extended types");
20438 return MVT::nxv2i64;
20443 return MVT::nxv4i32;
20447 case MVT::nxv8bf16:
20448 return MVT::nxv8i16;
20450 return MVT::nxv16i8;
20456 EVT VT =
N->getValueType(0);
20461 EVT ContainerVT = VT;
20466 SDValue Ops[] = {
N->getOperand(0),
20474 if (ContainerVT.
isInteger() && (VT != ContainerVT))
20482 EVT VT =
N->getValueType(0);
20483 EVT PtrTy =
N->getOperand(3).getValueType();
20489 auto *MINode = cast<MemIntrinsicSDNode>(
N);
20492 MINode->getOperand(3), DAG.
getUNDEF(PtrTy),
20494 MINode->getMemoryVT(), MINode->getMemOperand(),
20505template <
unsigned Opcode>
20509 "Unsupported opcode.");
20511 EVT VT =
N->getValueType(0);
20517 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(2),
N->getOperand(3)};
20530 EVT DataVT =
Data.getValueType();
20538 if (
Data.getValueType().isFloatingPoint())
20543 SDValue Ops[] = {
N->getOperand(0),
20557 EVT DataVT =
Data.getValueType();
20558 EVT PtrTy =
N->getOperand(4).getValueType();
20563 auto *MINode = cast<MemIntrinsicSDNode>(
N);
20566 MINode->getMemoryVT(), MINode->getMemOperand(),
20596 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
20598 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
20620 if (Offset < -512 || Offset > 504)
20624 for (
int I = 0;
I < NumVecElts; ++
I) {
20636 ZeroReg = AArch64::WZR;
20639 ZeroReg = AArch64::XZR;
20663 if (NumVecElts != 4 && NumVecElts != 2)
20674 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
20676 for (
unsigned I = 0;
I < NumVecElts; ++
I) {
20692 if (IndexVal >= NumVecElts)
20694 IndexNotInserted.reset(IndexVal);
20699 if (IndexNotInserted.any())
20723 return ReplacedZeroSplat;
20729 if (!Subtarget->isMisaligned128StoreSlow())
20754 return ReplacedSplat;
20780 if (
N->getOperand(2).isUndef())
20781 return N->getOperand(1);
20790 "Unexpected Opcode!");
20793 if (
N->getOperand(0).isUndef())
20794 return DAG.
getUNDEF(
N->getValueType(0));
20799 if (
N->getOperand(0).getOpcode() ==
ISD::MLOAD &&
20810 unsigned PgPattern = Mask->getConstantOperandVal(0);
20811 EVT VT =
N->getValueType(0);
20853 if (VT == MVT::nxv8i16)
20854 ResVT = MVT::nxv16i8;
20855 else if (VT == MVT::nxv4i32)
20856 ResVT = MVT::nxv8i16;
20857 else if (VT == MVT::nxv2i64)
20858 ResVT = MVT::nxv4i32;
20866 unsigned ShiftValue = SrlOp1->getZExtValue();
20877 uint64_t AddValue = AddOp1->getZExtValue();
20878 if (AddValue != 1ULL << (ShiftValue - 1))
20893 EVT ResVT =
N->getValueType(0);
20897 EVT BCVT = MVT::Other, HalfVT = MVT::Other;
20903 HalfVT = MVT::v8i8;
20907 HalfVT = MVT::v4i16;
20911 HalfVT = MVT::v2i32;
20914 if (BCVT != MVT::Other) {
20949 if (!IsLittleEndian)
20952 if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
20956 const unsigned Opcode = Operand.
getOpcode();
20965 SDValue SourceOp0 = getSourceOp(Op0);
20966 SDValue SourceOp1 = getSourceOp(Op1);
20968 if (!SourceOp0 || !SourceOp1)
20979 ResultTy = MVT::v4i32;
20982 ResultTy = MVT::v8i16;
20985 ResultTy = MVT::v16i8;
20996 EVT BitcastResultTy;
21000 BitcastResultTy = MVT::v2i64;
21003 BitcastResultTy = MVT::v4i32;
21006 BitcastResultTy = MVT::v8i16;
21017 unsigned Opc =
N->getOpcode();
21023 "Invalid opcode.");
21041 EVT ResVT =
N->getValueType(0);
21043 const auto OffsetOpc =
Offset.getOpcode();
21044 const bool OffsetIsZExt =
21046 const bool OffsetIsSExt =
21050 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
21052 VTSDNode *ExtFrom = cast<VTSDNode>(
Offset.getOperand(2).getNode());
21058 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
21065 return DAG.
getNode(NewOpc,
DL, {ResVT, MVT::Other},
21066 {Chain, Pg,
Base, UnextendedOffset, Ty});
21082 unsigned OpScalarSize =
Op.getScalarValueSizeInBits();
21084 unsigned ShiftImm =
N->getConstantOperandVal(1);
21085 assert(OpScalarSize > ShiftImm &&
"Invalid shift imm");
21090 N->getOperand(1) ==
Op.getOperand(1))
21092 return Op.getOperand(0);
21095 APInt DemandedMask = ~ShiftedOutBits;
21108 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
21110 SDValue CC =
N->getOperand(0)->getOperand(0);
21111 auto VT =
CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.
getContext());
21129 EVT VT =
N->getValueType(0);
21134 unsigned LoadIdx = IsLaneOp ? 1 : 0;
21135 SDNode *LD =
N->getOperand(LoadIdx).getNode();
21143 Lane =
N->getOperand(2);
21144 auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
21159 if (UI.getUse().getResNo() == 1)
21168 if (
N->hasOneUse()) {
21169 unsigned UseOpc =
N->use_begin()->getOpcode();
21178 Addr.getNode()->use_end(); UI != UE; ++UI) {
21181 || UI.getUse().getResNo() !=
Addr.getResNo())
21187 uint32_t IncVal = CInc->getZExtValue();
21189 if (IncVal != NumBytes)
21215 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
21255 "Expected STORE dag node in input!");
21257 if (
auto Store = dyn_cast<StoreSDNode>(
N)) {
21258 if (!Store->isTruncatingStore() || Store->isIndexed())
21260 SDValue Ext = Store->getValue();
21261 auto ExtOpCode = Ext.getOpcode();
21265 SDValue Orig = Ext->getOperand(0);
21269 Store->getBasePtr(), Store->getMemOperand());
21286 EVT MemVT = LD->getMemoryVT();
21287 if (LD->isVolatile() || !LD->isNonTemporal() || !Subtarget->
isLittleEndian())
21296 SDValue Chain = LD->getChain();
21297 SDValue BasePtr = LD->getBasePtr();
21309 for (
unsigned I = 0;
I < Num256Loads;
I++) {
21310 unsigned PtrOffset =
I * 32;
21315 NewVT,
DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset),
21316 NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo());
21326 unsigned PtrOffset = (MemVT.
getSizeInBits() - BitsRemaining) / 8;
21334 DAG.
getLoad(RemainingVT,
DL, Chain, NewPtr,
21335 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
21336 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21339 SDValue ExtendedReminingLoad =
21341 {UndefVector, RemainingLoad, InsertIdx});
21342 LoadOps.push_back(ExtendedReminingLoad);
21359 EVT VecVT =
Op.getValueType();
21361 "Need boolean vector type.");
21368 return Op.getOperand(0).getValueType();
21372 for (
SDValue Operand :
Op->op_values()) {
21378 BaseVT = OperandVT;
21379 else if (OperandVT != BaseVT)
21397 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
21409 unsigned BitsPerElement = std::max(64 / NumElts, 8u);
21425 if (VecVT == MVT::v16i8) {
21429 for (
unsigned Half = 0; Half < 2; ++Half) {
21430 for (
unsigned MaskBit = 1; MaskBit <= 128; MaskBit *= 2) {
21438 SDValue UpperRepresentativeBits =
21442 RepresentativeBits, UpperRepresentativeBits);
21449 for (
unsigned MaskBit = 1; MaskBit <= MaxBitMask; MaskBit *= 2) {
21463 if (!Store->isTruncatingStore())
21467 SDValue VecOp = Store->getValue();
21469 EVT MemVT = Store->getMemoryVT();
21488 return DAG.
getStore(Store->getChain(),
DL, ExtendedBits, Store->getBasePtr(),
21489 Store->getMemOperand());
21493 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) ||
21494 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) ||
21495 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv2i32);
21503 SDValue Chain = ST->getChain();
21506 EVT ValueVT =
Value.getValueType();
21508 auto hasValidElementTypeForFPTruncStore = [](
EVT VT) {
21510 return EltVT == MVT::f32 || EltVT == MVT::f64;
21522 hasValidElementTypeForFPTruncStore(
Value.getOperand(0).getValueType()))
21524 ST->getMemoryVT(), ST->getMemOperand());
21539 if (ST->isTruncatingStore()) {
21540 EVT StoreVT = ST->getMemoryVT();
21545 return DAG.
getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(),
21546 StoreVT, ST->getMemOperand());
21567 Value.getValueType().isInteger()) {
21572 EVT InVT =
Value.getOperand(0).getValueType();
21576 unsigned PgPattern = Mask->getConstantOperandVal(0);
21595 EVT ValueVT =
Value->getValueType(0);
21614 EVT IndexVT =
Index.getValueType();
21650 Add.getOperand(0), ShiftOp);
21666 bool Changed =
false;
21672 EVT IndexVT =
Index.getValueType();
21677 EVT DataVT =
N->getOperand(1).getValueType();
21690 int64_t Stride = 0;
21692 Stride = cast<ConstantSDNode>(
Index.getOperand(0))->getSExtValue();
21702 Stride = Step << Shift->getZExtValue();
21710 if (Stride < std::numeric_limits<int32_t>::min() ||
21711 Stride > std::numeric_limits<int32_t>::max())
21715 unsigned MaxVScale =
21717 int64_t LastElementOffset =
21720 if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
21721 LastElementOffset > std::numeric_limits<int32_t>::max())
21734 assert(MGS &&
"Can only combine gather load or scatter store nodes");
21752 if (
auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
21754 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr,
Index, Scale};
21759 auto *MSC = cast<MaskedScatterSDNode>(MGS);
21775 unsigned AddrOpIdx =
N->getNumOperands() - 1;
21780 UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
21783 UI.getUse().getResNo() !=
Addr.getResNo())
21798 bool IsStore =
false;
21799 bool IsLaneOp =
false;
21800 bool IsDupOp =
false;
21801 unsigned NewOpc = 0;
21802 unsigned NumVecs = 0;
21803 unsigned IntNo =
N->getConstantOperandVal(1);
21807 NumVecs = 2;
break;
21809 NumVecs = 3;
break;
21811 NumVecs = 4;
break;
21813 NumVecs = 2; IsStore =
true;
break;
21815 NumVecs = 3; IsStore =
true;
break;
21817 NumVecs = 4; IsStore =
true;
break;
21819 NumVecs = 2;
break;
21821 NumVecs = 3;
break;
21823 NumVecs = 4;
break;
21825 NumVecs = 2; IsStore =
true;
break;
21827 NumVecs = 3; IsStore =
true;
break;
21829 NumVecs = 4; IsStore =
true;
break;
21831 NumVecs = 2; IsDupOp =
true;
break;
21833 NumVecs = 3; IsDupOp =
true;
break;
21835 NumVecs = 4; IsDupOp =
true;
break;
21837 NumVecs = 2; IsLaneOp =
true;
break;
21839 NumVecs = 3; IsLaneOp =
true;
break;
21841 NumVecs = 4; IsLaneOp =
true;
break;
21843 NumVecs = 2; IsStore =
true; IsLaneOp =
true;
break;
21845 NumVecs = 3; IsStore =
true; IsLaneOp =
true;
break;
21847 NumVecs = 4; IsStore =
true; IsLaneOp =
true;
break;
21852 VecTy =
N->getOperand(2).getValueType();
21854 VecTy =
N->getValueType(0);
21859 uint32_t IncVal = CInc->getZExtValue();
21861 if (IsLaneOp || IsDupOp)
21863 if (IncVal != NumBytes)
21870 if (IsLaneOp || IsStore)
21871 for (
unsigned i = 2; i < AddrOpIdx; ++i)
21878 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
21880 for (n = 0; n < NumResultVecs; ++n)
21882 Tys[n++] = MVT::i64;
21883 Tys[n] = MVT::Other;
21892 std::vector<SDValue> NewResults;
21893 for (
unsigned i = 0; i < NumResultVecs; ++i) {
21896 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
21910 switch(V.getNode()->getOpcode()) {
21914 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
21915 if ((LoadNode->
getMemoryVT() == MVT::i8 && width == 8)
21916 || (LoadNode->
getMemoryVT() == MVT::i16 && width == 16)) {
21923 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
21924 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
21925 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
21932 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
21933 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
21934 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
21942 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
21943 1LL << (width - 1);
22013 int CompConstant) {
22017 int MaxUInt = (1 << width);
22025 AddConstant -= (1 << (width-1));
22030 if ((AddConstant == 0) ||
22031 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
22032 (AddConstant >= 0 && CompConstant < 0) ||
22033 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
22038 if ((AddConstant == 0) ||
22039 (AddConstant >= 0 && CompConstant <= 0) ||
22040 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
22045 if ((AddConstant >= 0 && CompConstant < 0) ||
22046 (AddConstant <= 0 && CompConstant >= -1 &&
22047 CompConstant < AddConstant + MaxUInt))
22052 if ((AddConstant == 0) ||
22053 (AddConstant > 0 && CompConstant <= 0) ||
22054 (AddConstant < 0 && CompConstant <= AddConstant))
22059 if ((AddConstant >= 0 && CompConstant <= 0) ||
22060 (AddConstant <= 0 && CompConstant >= 0 &&
22061 CompConstant <= AddConstant + MaxUInt))
22066 if ((AddConstant > 0 && CompConstant < 0) ||
22067 (AddConstant < 0 && CompConstant >= 0 &&
22068 CompConstant < AddConstant + MaxUInt) ||
22069 (AddConstant >= 0 && CompConstant >= 0 &&
22070 CompConstant >= AddConstant) ||
22071 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
22090 unsigned CCIndex,
unsigned CmpIndex,
22119 N->getOperand(CCIndex)->getValueType(0));
22127 assert((CCIndex == 2 && CmpIndex == 3) &&
22128 "Expected CCIndex to be 2 and CmpIndex to be 3.");
22129 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), AArch64_CC,
22131 return DAG.
getNode(
N->getOpcode(),
N,
N->getVTList(), Ops);
22138 unsigned CmpIndex) {
22139 unsigned CC = cast<ConstantSDNode>(
N->getOperand(CCIndex))->getSExtValue();
22140 SDNode *SubsNode =
N->getOperand(CmpIndex).getNode();
22141 unsigned CondOpcode = SubsNode->
getOpcode();
22150 unsigned MaskBits = 0;
22160 uint32_t CNV = CN->getZExtValue();
22163 else if (CNV == 65535)
22184 if (!isa<ConstantSDNode>(AddInputValue2.
getNode()) ||
22185 !isa<ConstantSDNode>(SubsInputValue.
getNode()))
22196 cast<ConstantSDNode>(AddInputValue2.
getNode())->getSExtValue(),
22197 cast<ConstantSDNode>(SubsInputValue.
getNode())->getSExtValue()))
22230 assert(isa<ConstantSDNode>(CCVal) &&
"Expected a ConstantSDNode here!");
22235 unsigned CmpOpc = Cmp.getOpcode();
22241 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
22248 "Expected the value type to be the same for both operands!");
22249 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
22276 unsigned CC =
N->getConstantOperandVal(2);
22281 Zero =
N->getOperand(0);
22282 CTTZ =
N->getOperand(1);
22284 Zero =
N->getOperand(1);
22285 CTTZ =
N->getOperand(0);
22291 CTTZ.getOperand(0).getOpcode() !=
ISD::CTTZ))
22294 assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
22295 "Illegal type in CTTZ folding");
22301 ? CTTZ.getOperand(0).getOperand(0)
22302 : CTTZ.getOperand(0);
22304 if (
X != SUBS.getOperand(0))
22308 ? CTTZ.getOperand(0).getValueSizeInBits()
22309 : CTTZ.getValueSizeInBits();
22343 if (!isa<ConstantSDNode>(
X) || !isa<ConstantSDNode>(
Y) ||
X ==
Y) {
22361 else if (CmpRHS !=
X)
22370 EVT VT =
Op->getValueType(0);
22381 if (
N->getOperand(0) ==
N->getOperand(1))
22382 return N->getOperand(0);
22399 EVT Op0MVT =
Op->getOperand(0).getValueType();
22405 SDNode *FirstUse = *
Op->use_begin();
22412 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
22427 Op->getOperand(0));
22429 Op->getOperand(0));
22430 if (Op0SExt && (isSignedIntSetCC(
CC) || isIntEqualitySetCC(
CC))) {
22431 Op0ExtV =
SDValue(Op0SExt, 0);
22433 }
else if (Op0ZExt && (isUnsignedIntSetCC(
CC) || isIntEqualitySetCC(
CC))) {
22434 Op0ExtV =
SDValue(Op0ZExt, 0);
22440 Op0ExtV, Op1ExtV,
Op->getOperand(2));
22467 EVT VT =
N->getValueType(0);
22480 auto NewCond = getInvertedCondCode(OldCond);
22486 LHS.getOperand(3));
22492 LHS->getOpcode() ==
ISD::SRL && isa<ConstantSDNode>(
LHS->getOperand(1)) &&
22495 EVT TstVT =
LHS->getValueType(0);
22498 uint64_t TstImm = -1ULL <<
LHS->getConstantOperandVal(1);
22513 EVT ToVT =
LHS->getValueType(0);
22514 EVT FromVT =
LHS->getOperand(0).getValueType();
22519 DL, MVT::i1,
LHS->getOperand(0));
22537 unsigned GenericOpcode) {
22541 EVT VT =
N->getValueType(0);
22544 if (!
N->hasAnyUseOfValue(1)) {
22599 "Unexpected opcode!");
22612 LHS->getOperand(0)->getValueType(0) ==
N->getValueType(0)) {
22617 LHS->getOperand(0)->getOperand(0) == Pred)
22618 return LHS->getOperand(0);
22624 return LHS->getOperand(0);
22633 LHS->getOperand(0), Pred);
22646 if (!
Op->hasOneUse())
22656 Bit < Op->getValueType(0).getSizeInBits()) {
22662 Bit < Op->getOperand(0).getValueSizeInBits()) {
22669 auto *
C = dyn_cast<ConstantSDNode>(
Op->getOperand(1));
22673 switch (
Op->getOpcode()) {
22679 if ((
C->getZExtValue() >> Bit) & 1)
22685 if (
C->getZExtValue() <= Bit &&
22686 (Bit -
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
22687 Bit = Bit -
C->getZExtValue();
22694 Bit = Bit +
C->getZExtValue();
22695 if (Bit >=
Op->getValueType(0).getSizeInBits())
22696 Bit =
Op->getValueType(0).getSizeInBits() - 1;
22701 if ((Bit +
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
22702 Bit = Bit +
C->getZExtValue();
22709 if ((
C->getZExtValue() >> Bit) & 1)
22719 unsigned Bit =
N->getConstantOperandVal(2);
22720 bool Invert =
false;
22721 SDValue TestSrc =
N->getOperand(1);
22724 if (TestSrc == NewTestSrc)
22727 unsigned NewOpc =
N->getOpcode();
22738 return DAG.
getNode(NewOpc,
DL, MVT::Other,
N->getOperand(0), NewTestSrc,
22748 auto SelectA =
N->getOperand(1);
22749 auto SelectB =
N->getOperand(2);
22750 auto NTy =
N->getValueType(0);
22752 if (!NTy.isScalableVector())
22758 switch (SelectB.getOpcode()) {
22766 if (SelectA != SelectB.getOperand(0))
22772 auto InverseSetCC =
22777 {InverseSetCC, SelectB, SelectA});
22793 return N->getOperand(1);
22796 return N->getOperand(2);
22807 SDNode *SplatLHS =
N->getOperand(1).getNode();
22808 SDNode *SplatRHS =
N->getOperand(2).getNode();
22810 if (CmpLHS.
getValueType() ==
N->getOperand(1).getValueType() &&
22813 MVT::v2i32, MVT::v4i32, MVT::v2i64}),
22837 EVT ResVT =
N->getValueType(0);
22843 SDValue IfTrue =
N->getOperand(1);
22844 SDValue IfFalse =
N->getOperand(2);
22847 cast<CondCodeSDNode>(N0.
getOperand(2))->get());
22860 EVT ResVT =
N->getValueType(0);
22872 "Scalar-SETCC feeding SELECT has unexpected result type!");
22882 if (SrcVT == MVT::i1 ||
22887 if (!ResVT.
isVector() || NumMaskElts == 0)
22918 return DAG.
getSelect(
DL, ResVT, Mask,
N->getOperand(1),
N->getOperand(2));
22923 EVT VT =
N->getValueType(0);
22946 SDValue EXTRACT_VEC_ELT =
N->getOperand(0);
22964 if (
N->getValueType(0) ==
N->getOperand(0).getValueType())
22965 return N->getOperand(0);
22976 auto *GN = cast<GlobalAddressSDNode>(
N);
22985 auto *
C = dyn_cast<ConstantSDNode>(
N->getOperand(0));
22987 C = dyn_cast<ConstantSDNode>(
N->getOperand(1));
22990 MinOffset = std::min(MinOffset,
C->getZExtValue());
23009 if (
Offset >= (1 << 20))
23014 if (!
T->isSized() ||
23028 !BR.getValueType().isScalarInteger())
23040 "This method is only for scalable vectors of offsets");
23056 unsigned ScalarSizeInBytes) {
23058 if (OffsetInBytes % ScalarSizeInBytes)
23062 if (OffsetInBytes / ScalarSizeInBytes > 31)
23076 unsigned ScalarSizeInBytes) {
23084 bool OnlyPackedOffsets =
true) {
23085 const SDValue Src =
N->getOperand(2);
23086 const EVT SrcVT = Src->getValueType(0);
23088 "Scatter stores are only possible for SVE vectors");
23100 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) &&
23103 ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16))))
23132 Offset.getValueType().isVector())
23144 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
23154 if (!TLI.isTypeLegal(
Base.getValueType()))
23160 if (!OnlyPackedOffsets &&
23161 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
23164 if (!TLI.isTypeLegal(
Offset.getValueType()))
23180 if (Src.getValueType().isFloatingPoint())
23185 SDValue Ops[] = {
N->getOperand(0),
23192 return DAG.
getNode(Opcode,
DL, VTs, Ops);
23197 bool OnlyPackedOffsets =
true) {
23198 const EVT RetVT =
N->getValueType(0);
23200 "Gather loads are only possible for SVE vectors");
23235 Offset.getValueType().isVector())
23248 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
23262 if (!TLI.isTypeLegal(
Base.getValueType()))
23268 if (!OnlyPackedOffsets &&
23269 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
23283 SDValue Ops[] = {
N->getOperand(0),
23290 if (RetVT.
isInteger() && (RetVT != HwRetVt))
23306 unsigned Opc = Src->getOpcode();
23322 SDValue ExtOp = Src->getOperand(0);
23323 auto VT = cast<VTSDNode>(
N->getOperand(1))->getVT();
23327 assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
23328 "Sign extending from an invalid type");
23335 return DAG.
getNode(SOpc,
DL,
N->getValueType(0), Ext);
23347 unsigned MemVTOpNum = 4;
23410 EVT SignExtSrcVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
23411 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
23413 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
23416 EVT DstVT =
N->getValueType(0);
23420 for (
unsigned I = 0;
I < Src->getNumOperands(); ++
I)
23435 const unsigned OffsetPos = 4;
23439 if (
Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
23447 Ops[OffsetPos] =
Offset;
23458 unsigned ScalarSizeInBytes) {
23459 const unsigned ImmPos = 4, OffsetPos = 3;
23466 std::swap(Ops[ImmPos], Ops[OffsetPos]);
23470 Ops[1] = DAG.
getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index,
DL,
23479 switch (
Op.getOpcode()) {
23503 SDValue InsertVec =
N->getOperand(0);
23504 SDValue InsertElt =
N->getOperand(1);
23505 SDValue InsertIdx =
N->getOperand(2);
23546 EVT Ty =
N->getValueType(0);
23571 EVT VT =
N->getValueType(0);
23577 auto hasValidElementTypeForFPExtLoad = [](
EVT VT) {
23579 return EltVT == MVT::f32 || EltVT == MVT::f64;
23607 EVT VT =
N->getValueType(0);
23610 if (!VT.
isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
23626 EVT VT =
N->getValueType(0);
23628 SDValue Insert =
N->getOperand(0);
23632 if (!Insert.getOperand(0).isUndef())
23635 uint64_t IdxInsert = Insert.getConstantOperandVal(2);
23636 uint64_t IdxDupLane =
N->getConstantOperandVal(1);
23637 if (IdxInsert != 0 || IdxDupLane != 0)
23640 SDValue Bitcast = Insert.getOperand(1);
23644 SDValue Subvec = Bitcast.getOperand(0);
23654 DAG.
getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
23656 NewInsert,
N->getOperand(1));
23681 ExtractHigh =
LHS.getOperand(0);
23688 ExtractHigh =
RHS.getOperand(0);
23716 bool HasFoundMULLow =
true;
23718 if (ExtractHighSrcVec->
use_size() != 2)
23719 HasFoundMULLow =
false;
23728 HasFoundMULLow =
false;
23735 if (!ExtractLow || !ExtractLow->
hasOneUse())
23736 HasFoundMULLow =
false;
23739 if (HasFoundMULLow) {
23741 if (ExtractLowUser->
getOpcode() !=
N->getOpcode()) {
23742 HasFoundMULLow =
false;
23744 if (ExtractLowUser->
getOperand(0) == ExtractLow) {
23748 HasFoundMULLow =
false;
23753 HasFoundMULLow =
false;
23771 if (TruncHighOpVT != UZP1VT)
23773 if (TruncLowOpVT != UZP1VT)
23784 if (HasFoundMULLow) {
23823 EVT VT =
N->getValueType(0);
23824 if (VT != MVT::v1i64)
23841 UADDLV.getValueType() != MVT::v4i32 ||
23842 UADDLV.getOperand(0).getValueType() != MVT::v8i8)
23859 switch (
N->getOpcode()) {
24007 switch (
N->getConstantOperandVal(1)) {
24008 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
24010 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
24012 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
24014 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
24016 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
24017 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
24018 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
24019 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
24020 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
24021 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
24022 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
24023 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
24025 case Intrinsic::aarch64_neon_ld2:
24026 case Intrinsic::aarch64_neon_ld3:
24027 case Intrinsic::aarch64_neon_ld4:
24028 case Intrinsic::aarch64_neon_ld1x2:
24029 case Intrinsic::aarch64_neon_ld1x3:
24030 case Intrinsic::aarch64_neon_ld1x4:
24031 case Intrinsic::aarch64_neon_ld2lane:
24032 case Intrinsic::aarch64_neon_ld3lane:
24033 case Intrinsic::aarch64_neon_ld4lane:
24034 case Intrinsic::aarch64_neon_ld2r:
24035 case Intrinsic::aarch64_neon_ld3r:
24036 case Intrinsic::aarch64_neon_ld4r:
24037 case Intrinsic::aarch64_neon_st2:
24038 case Intrinsic::aarch64_neon_st3:
24039 case Intrinsic::aarch64_neon_st4:
24040 case Intrinsic::aarch64_neon_st1x2:
24041 case Intrinsic::aarch64_neon_st1x3:
24042 case Intrinsic::aarch64_neon_st1x4:
24043 case Intrinsic::aarch64_neon_st2lane:
24044 case Intrinsic::aarch64_neon_st3lane:
24045 case Intrinsic::aarch64_neon_st4lane:
24047 case Intrinsic::aarch64_sve_ldnt1:
24049 case Intrinsic::aarch64_sve_ld1rq:
24050 return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(
N, DAG);
24051 case Intrinsic::aarch64_sve_ld1ro:
24052 return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(
N, DAG);
24053 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
24055 case Intrinsic::aarch64_sve_ldnt1_gather:
24057 case Intrinsic::aarch64_sve_ldnt1_gather_index:
24060 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
24062 case Intrinsic::aarch64_sve_ld1:
24064 case Intrinsic::aarch64_sve_ldnf1:
24066 case Intrinsic::aarch64_sve_ldff1:
24068 case Intrinsic::aarch64_sve_st1:
24070 case Intrinsic::aarch64_sve_stnt1:
24072 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
24074 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
24076 case Intrinsic::aarch64_sve_stnt1_scatter:
24078 case Intrinsic::aarch64_sve_stnt1_scatter_index:
24080 case Intrinsic::aarch64_sve_ld1_gather:
24082 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
24083 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
24085 case Intrinsic::aarch64_sve_ld1q_gather_index:
24088 case Intrinsic::aarch64_sve_ld1_gather_index:
24091 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
24094 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
24097 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
24101 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
24105 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
24107 case Intrinsic::aarch64_sve_ldff1_gather:
24109 case Intrinsic::aarch64_sve_ldff1_gather_index:
24112 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
24116 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
24120 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
24124 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
24128 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
24131 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
24132 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
24134 case Intrinsic::aarch64_sve_st1q_scatter_index:
24136 case Intrinsic::aarch64_sve_st1_scatter:
24138 case Intrinsic::aarch64_sve_st1_scatter_index:
24140 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
24143 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
24146 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
24150 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
24154 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
24156 case Intrinsic::aarch64_rndr:
24157 case Intrinsic::aarch64_rndrrs: {
24158 unsigned IntrinsicID =
N->getConstantOperandVal(1);
24160 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
24161 : AArch64SysReg::RNDRRS);
24173 case Intrinsic::aarch64_sme_ldr_zt:
24175 DAG.
getVTList(MVT::Other),
N->getOperand(0),
24176 N->getOperand(2),
N->getOperand(3));
24177 case Intrinsic::aarch64_sme_str_zt:
24179 DAG.
getVTList(MVT::Other),
N->getOperand(0),
24180 N->getOperand(2),
N->getOperand(3));
24199bool AArch64TargetLowering::isUsedByReturnOnly(
SDNode *
N,
24201 if (
N->getNumValues() != 1)
24203 if (!
N->hasNUsesOfValue(1, 0))
24207 SDNode *Copy = *
N->use_begin();
24211 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
24214 TCChain = Copy->getOperand(0);
24218 bool HasRet =
false;
24236bool AArch64TargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
24244 if (!CstOffset || CstOffset->isZero())
24250 return isInt<9>(CstOffset->getSExtValue());
24253bool AArch64TargetLowering::getIndexedAddressParts(
SDNode *
N,
SDNode *
Op,
24261 SDNode *ValOnlyUser =
nullptr;
24264 if (UI.getUse().getResNo() == 1)
24266 if (ValOnlyUser ==
nullptr)
24269 ValOnlyUser =
nullptr;
24274 auto IsUndefOrZero = [](
SDValue V) {
24283 IsUndefOrZero(ValOnlyUser->
getOperand(2)))))
24286 Base =
Op->getOperand(0);
24290 int64_t RHSC =
RHS->getSExtValue();
24293 if (!isInt<9>(RHSC))
24310 VT =
LD->getMemoryVT();
24311 Ptr =
LD->getBasePtr();
24312 }
else if (
StoreSDNode *ST = dyn_cast<StoreSDNode>(
N)) {
24313 VT =
ST->getMemoryVT();
24314 Ptr =
ST->getBasePtr();
24318 if (!getIndexedAddressParts(
N,
Ptr.getNode(),
Base,
Offset, DAG))
24324bool AArch64TargetLowering::getPostIndexedAddressParts(
24330 VT =
LD->getMemoryVT();
24331 Ptr =
LD->getBasePtr();
24332 }
else if (
StoreSDNode *ST = dyn_cast<StoreSDNode>(
N)) {
24333 VT =
ST->getMemoryVT();
24334 Ptr =
ST->getBasePtr();
24353 EVT VT =
N->getValueType(0);
24354 [[maybe_unused]]
EVT SrcVT =
Op.getValueType();
24356 "Must be bool vector.");
24362 bool AllUndef =
true;
24364 AllUndef &=
Op.getOperand(
I).isUndef();
24367 Op =
Op.getOperand(0);
24381 EVT VT =
N->getValueType(0);
24391void AArch64TargetLowering::ReplaceBITCASTResults(
24395 EVT VT =
N->getValueType(0);
24396 EVT SrcVT =
Op.getValueType();
24398 if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
24403 if (VT == MVT::v4i8 && SrcVT == MVT::i32) {
24408 if (VT == MVT::v2i8 && SrcVT == MVT::i16) {
24415 "Expected fp->int bitcast!");
24434 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
24446 EVT VT =
N->getValueType(0);
24449 !
N->getFlags().hasAllowReassociation()) ||
24450 (VT.
getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()))
24454 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(1));
24456 Shuf = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(0));
24457 X =
N->getOperand(1);
24462 if (Shuf->getOperand(0) !=
X || !Shuf->getOperand(1)->isUndef())
24467 for (
int I = 0,
E = Mask.size();
I <
E;
I++)
24468 if (Mask[
I] != (
I % 2 == 0 ?
I + 1 :
I - 1))
24473 assert(LoHi.first.getValueType() == LoHi.second.getValueType());
24475 LoHi.first, LoHi.second);
24486 DAG.
getUNDEF(LoHi.first.getValueType())),
24493 unsigned AcrossOp) {
24504void AArch64TargetLowering::ReplaceExtractSubVectorResults(
24507 EVT InVT =
In.getValueType();
24514 EVT VT =
N->getValueType(0);
24523 auto *CIndex = dyn_cast<ConstantSDNode>(
N->getOperand(1));
24527 unsigned Index = CIndex->getZExtValue();
24540 SDLoc dl(V.getNode());
24541 auto [VLo, VHi] = DAG.
SplitScalar(V, dl, MVT::i64, MVT::i64);
24548 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
24550 DAG.
getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
24557 assert(
N->getValueType(0) == MVT::i128 &&
24558 "AtomicCmpSwap on types less than 128 should be legal");
24561 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
24572 switch (
MemOp->getMergedOrdering()) {
24574 Opcode = AArch64::CASPX;
24577 Opcode = AArch64::CASPAX;
24580 Opcode = AArch64::CASPLX;
24584 Opcode = AArch64::CASPALX;
24594 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
24608 switch (
MemOp->getMergedOrdering()) {
24610 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
24613 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
24616 Opcode = AArch64::CMP_SWAP_128_RELEASE;
24620 Opcode = AArch64::CMP_SWAP_128;
24627 auto Desired = DAG.
SplitScalar(
N->getOperand(2),
DL, MVT::i64, MVT::i64);
24628 auto New = DAG.
SplitScalar(
N->getOperand(3),
DL, MVT::i64, MVT::i64);
24629 SDValue Ops[] = {
N->getOperand(1), Desired.first, Desired.second,
24630 New.first, New.second,
N->getOperand(0)};
24632 Opcode,
SDLoc(
N), DAG.
getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
24648 "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
24654 switch (Ordering) {
24656 return AArch64::LDCLRP;
24659 return AArch64::LDCLRPA;
24662 return AArch64::LDCLRPL;
24666 return AArch64::LDCLRPAL;
24674 switch (Ordering) {
24676 return AArch64::LDSETP;
24679 return AArch64::LDSETPA;
24682 return AArch64::LDSETPL;
24686 return AArch64::LDSETPAL;
24694 switch (Ordering) {
24696 return AArch64::SWPP;
24699 return AArch64::SWPPA;
24702 return AArch64::SWPPL;
24706 return AArch64::SWPPAL;
24728 assert(
N->getValueType(0) == MVT::i128 &&
24729 "AtomicLoadXXX on types less than 128 should be legal");
24731 if (!Subtarget->hasLSE128())
24735 const SDValue &Chain =
N->getOperand(0);
24737 const SDValue &Val128 =
N->getOperand(2);
24738 std::pair<SDValue, SDValue> Val2x64 =
24741 const unsigned ISDOpcode =
N->getOpcode();
24742 const unsigned MachineOpcode =
24749 DAG.
getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
24752 DAG.
getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
24755 SDValue Ops[] = {Val2x64.first, Val2x64.second,
Ptr, Chain};
24761 DAG.
getVTList(MVT::i64, MVT::i64, MVT::Other), Ops);
24773void AArch64TargetLowering::ReplaceNodeResults(
24775 switch (
N->getOpcode()) {
24779 ReplaceBITCASTResults(
N,
Results, DAG);
24830 assert(
N->getValueType(0) == MVT::i128 &&
"unexpected illegal conversion");
24837 assert(
N->getValueType(0) != MVT::i128 &&
24838 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
24844 "Expected 128-bit atomicrmw.");
24864 DAG.
getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
24865 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
24867 {LoadNode->getChain(), LoadNode->getBasePtr()},
24884 auto *AN = dyn_cast<AtomicSDNode>(LoadNode);
24885 bool isLoadAcquire =
24890 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
24894 {LoadNode->getChain(), LoadNode->getBasePtr()},
24901 Result.getValue(FirstRes),
Result.getValue(1 - FirstRes));
24907 ReplaceExtractSubVectorResults(
N,
Results, DAG);
24916 EVT VT =
N->getValueType(0);
24917 assert((VT == MVT::i8 || VT == MVT::i16) &&
24918 "custom lowering for unexpected type");
24925 case Intrinsic::aarch64_sve_clasta_n: {
24929 N->getOperand(1), Op2,
N->getOperand(3));
24933 case Intrinsic::aarch64_sve_clastb_n: {
24937 N->getOperand(1), Op2,
N->getOperand(3));
24941 case Intrinsic::aarch64_sve_lasta: {
24944 N->getOperand(1),
N->getOperand(2));
24948 case Intrinsic::aarch64_sve_lastb: {
24951 N->getOperand(1),
N->getOperand(2));
24959 assert(
N->getValueType(0) == MVT::i128 &&
24960 "READ_REGISTER custom lowering is only for 128-bit sysregs");
24962 SDValue SysRegName =
N->getOperand(1);
24966 Chain, SysRegName);
24985unsigned AArch64TargetLowering::combineRepeatedFPDivisors()
const {
24995 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
25005 if (!Subtarget->hasLSE2())
25008 if (
auto LI = dyn_cast<LoadInst>(
I))
25009 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
25010 LI->getAlign() >=
Align(16);
25012 if (
auto SI = dyn_cast<StoreInst>(
I))
25013 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
25014 SI->getAlign() >=
Align(16);
25020 if (!Subtarget->hasLSE128())
25025 if (
const auto *SI = dyn_cast<StoreInst>(
I))
25026 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
25027 SI->getAlign() >=
Align(16) &&
25031 if (
const auto *RMW = dyn_cast<AtomicRMWInst>(
I))
25032 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
25033 RMW->getAlign() >=
Align(16) &&
25042 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
25045 if (
auto LI = dyn_cast<LoadInst>(
I))
25046 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
25047 LI->getAlign() >=
Align(16) &&
25050 if (
auto SI = dyn_cast<StoreInst>(
I))
25051 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
25052 SI->getAlign() >=
Align(16) &&
25078 switch (
I->getOpcode()) {
25081 case Instruction::AtomicCmpXchg:
25082 return cast<AtomicCmpXchgInst>(
I)->getSuccessOrdering() ==
25084 case Instruction::AtomicRMW:
25085 return cast<AtomicRMWInst>(
I)->getOrdering() ==
25087 case Instruction::Store:
25088 return cast<StoreInst>(
I)->getOrdering() ==
25098 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
25149 assert(
Size <= 128 &&
"AtomicExpandPass should've handled larger sizes.");
25154 bool CanUseLSE128 = Subtarget->hasLSE128() &&
Size == 128 &&
25164 if (Subtarget->hasLSE())
25166 if (Subtarget->outlineAtomics()) {
25189 Subtarget->hasLSE())
25199 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
25229 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
25242 Type *Tys[] = {
Addr->getType() };
25244 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
25274 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
25284 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
25285 Type *Tys[] = {
Addr->getType() };
25297 Attribute::ElementType, Val->
getType()));
25315bool AArch64TargetLowering::shouldNormalizeToSelectSequence(
LLVMContext &,
25349 M.getOrInsertGlobal(
"__security_cookie",
25359 F->addParamAttr(0, Attribute::AttrKind::InReg);
25369 return M.getGlobalVariable(
"__security_cookie");
25406 return Mask->getValue().isPowerOf2();
25412 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
25416 X, XC,
CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
25419 return X.getValueType().isScalarInteger() || NewShiftOpcode ==
ISD::SHL;
25442 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
25451 if (AArch64::GPR64RegClass.
contains(*
I))
25452 RC = &AArch64::GPR64RegClass;
25453 else if (AArch64::FPR64RegClass.
contains(*
I))
25454 RC = &AArch64::FPR64RegClass;
25464 assert(Entry->getParent()->getFunction().hasFnAttribute(
25465 Attribute::NoUnwind) &&
25466 "Function should be nounwind in insertCopiesSplitCSR!");
25467 Entry->addLiveIn(*
I);
25472 for (
auto *Exit : Exits)
25474 TII->get(TargetOpcode::COPY), *
I)
25487 bool OptSize = Attr.
hasFnAttr(Attribute::MinSize);
25500 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
25510 "Invalid call instruction for a KCFI check");
25512 switch (
MBBI->getOpcode()) {
25514 case AArch64::BLRNoIP:
25515 case AArch64::TCRETURNri:
25516 case AArch64::TCRETURNriBTI:
25523 assert(
Target.isReg() &&
"Invalid target operand for an indirect call");
25524 Target.setIsRenamable(
false);
25544void AArch64TargetLowering::finalizeLowering(
MachineFunction &MF)
const {
25572bool AArch64TargetLowering::shouldLocalize(
25574 auto &MF = *
MI.getMF();
25576 auto maxUses = [](
unsigned RematCost) {
25578 if (RematCost == 1)
25579 return std::numeric_limits<unsigned>::max();
25580 if (RematCost == 2)
25589 unsigned Opc =
MI.getOpcode();
25591 case TargetOpcode::G_GLOBAL_VALUE: {
25600 case TargetOpcode::G_FCONSTANT:
25601 case TargetOpcode::G_CONSTANT: {
25603 unsigned AdditionalCost = 0;
25605 if (Opc == TargetOpcode::G_CONSTANT)
25606 CI =
MI.getOperand(1).getCImm();
25608 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
25613 auto APF =
MI.getOperand(1).getFPImm()->getValueAPF();
25622 AdditionalCost = 1;
25630 RematCost += AdditionalCost;
25632 unsigned MaxUses = maxUses(RematCost);
25634 if (MaxUses == std::numeric_limits<unsigned>::max())
25636 return MRI.hasAtMostUserInstrs(Reg, MaxUses);
25640 case AArch64::ADRP:
25641 case AArch64::G_ADD_LOW:
25643 case TargetOpcode::G_PTR_ADD:
25659 if (
const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
25660 if (AI->getAllocatedType()->isScalableTy())
25665 if (
auto *
Base = dyn_cast<CallBase>(&Inst)) {
25668 if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
25669 CallerAttrs.requiresLazySave(CalleeAttrs))
25679 "Expected legal fixed length vector!");
25684 return EVT(MVT::nxv16i8);
25686 return EVT(MVT::nxv8i16);
25688 return EVT(MVT::nxv4i32);
25690 return EVT(MVT::nxv2i64);
25692 return EVT(MVT::nxv8f16);
25694 return EVT(MVT::nxv4f32);
25696 return EVT(MVT::nxv2f64);
25705 "Expected legal fixed length vector!");
25707 std::optional<unsigned> PgPattern =
25709 assert(PgPattern &&
"Unexpected element count for SVE predicate");
25716 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
25717 if (MaxSVESize && MinSVESize == MaxSVESize &&
25719 PgPattern = AArch64SVEPredPattern::all;
25726 MaskVT = MVT::nxv16i1;
25730 MaskVT = MVT::nxv8i1;
25734 MaskVT = MVT::nxv4i1;
25738 MaskVT = MVT::nxv2i1;
25742 return getPTrue(DAG,
DL, MaskVT, *PgPattern);
25748 "Expected legal scalable vector!");
25750 return getPTrue(DAG,
DL, PredTy, AArch64SVEPredPattern::all);
25763 "Expected to convert into a scalable vector!");
25764 assert(V.getValueType().isFixedLengthVector() &&
25765 "Expected a fixed length vector operand!");
25774 "Expected to convert into a fixed length vector!");
25775 assert(V.getValueType().isScalableVector() &&
25776 "Expected a scalable vector operand!");
25783SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
25785 auto Load = cast<LoadSDNode>(
Op);
25788 EVT VT =
Op.getValueType();
25790 EVT LoadVT = ContainerVT;
25791 EVT MemVT =
Load->getMemoryVT();
25801 LoadVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(), Pg,
25803 Load->getAddressingMode(),
Load->getExtensionType());
25808 Load->getMemoryVT().getVectorElementType());
25810 Result = getSVESafeBitCast(ExtendVT, Result, DAG);
25812 Pg, Result, DAG.
getUNDEF(ContainerVT));
25825 EVT InVT = Mask.getValueType();
25837 {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
25841SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
25843 auto Load = cast<MaskedLoadSDNode>(
Op);
25846 EVT VT =
Op.getValueType();
25854 "Incorrect mask type");
25860 bool IsPassThruZeroOrUndef =
false;
25862 if (
Load->getPassThru()->isUndef()) {
25863 PassThru = DAG.
getUNDEF(ContainerVT);
25864 IsPassThruZeroOrUndef =
true;
25871 IsPassThruZeroOrUndef =
true;
25875 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(),
25876 Mask, PassThru,
Load->getMemoryVT(),
Load->getMemOperand(),
25877 Load->getAddressingMode(),
Load->getExtensionType());
25880 if (!IsPassThruZeroOrUndef) {
25892SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
25894 auto Store = cast<StoreSDNode>(
Op);
25897 EVT VT =
Store->getValue().getValueType();
25906 Store->getMemoryVT().getVectorElementType());
25920 Store->getBasePtr(),
Store->getOffset(), Pg, MemVT,
25921 Store->getMemOperand(),
Store->getAddressingMode(),
25922 Store->isTruncatingStore());
25925SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
25927 auto *
Store = cast<MaskedStoreSDNode>(
Op);
25930 EVT VT =
Store->getValue().getValueType();
25938 Mask,
Store->getMemoryVT(),
Store->getMemOperand(),
25939 Store->getAddressingMode(),
Store->isTruncatingStore());
25942SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
25945 EVT VT =
Op.getValueType();
25969 if (EltVT == MVT::i32 || EltVT == MVT::i64)
25970 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
25986 auto HalveAndExtendVector = [&DAG, &dl, &HalfVT, &PromVT,
25990 DAG.
getConstant(HalfVT.getVectorNumElements(), dl, MVT::i64);
25993 return std::pair<SDValue, SDValue>(
25994 {DAG.
getNode(ExtendOpcode, dl, PromVT,
Lo),
25995 DAG.
getNode(ExtendOpcode, dl, PromVT,
Hi)});
25999 auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(
Op.getOperand(0));
26000 auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(
Op.getOperand(1));
26008SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
26010 EVT VT =
Op.getValueType();
26026 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv8i16, Val);
26031 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv4i32, Val);
26036 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv2i64, Val);
26044SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
26046 EVT VT =
Op.getValueType();
26080SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
26082 EVT VT =
Op.getValueType();
26083 EVT InVT =
Op.getOperand(0).getValueType();
26093SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
26095 EVT VT =
Op.getValueType();
26099 EVT InVT =
Op.getOperand(0).getValueType();
26104 Op.getOperand(1),
Op.getOperand(2));
26114 unsigned NewOp)
const {
26115 EVT VT =
Op.getValueType();
26125 for (
const SDValue &V :
Op->op_values()) {
26126 if (isa<CondCodeSDNode>(V)) {
26131 if (
const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
26139 "Expected only legal fixed-width types");
26153 for (
const SDValue &V :
Op->op_values()) {
26154 assert((!
V.getValueType().isVector() ||
26155 V.getValueType().isScalableVector()) &&
26156 "Only scalable vectors are supported!");
26171 EVT VT =
Op.getValueType();
26173 "Only expected to lower fixed length vector operation!");
26178 for (
const SDValue &V :
Op->op_values()) {
26179 assert(!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!");
26182 if (!
V.getValueType().isVector()) {
26188 assert(
V.getValueType().isFixedLengthVector() &&
26190 "Only fixed length vectors are supported!");
26194 auto ScalableRes = DAG.
getNode(
Op.getOpcode(),
SDLoc(
Op), ContainerVT, Ops);
26198SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(
SDValue ScalarOp,
26206 EVT ContainerVT = SrcVT;
26217 DAG.
getUNDEF(ContainerVT), AccOp, Zero);
26226SDValue AArch64TargetLowering::LowerPredReductionToSVE(
SDValue ReduceOp,
26230 EVT OpVT =
Op.getValueType();
26255 if (OpVT == MVT::nxv1i1) {
26269SDValue AArch64TargetLowering::LowerReductionToSVE(
unsigned Opcode,
26303AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
SDValue Op,
26305 EVT VT =
Op.getValueType();
26308 EVT InVT =
Op.getOperand(1).getValueType();
26315 EVT MaskVT =
Op.getOperand(0).getValueType();
26327SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
26330 EVT InVT =
Op.getOperand(0).getValueType();
26334 "Only expected to lower fixed length vector operation!");
26336 "Expected integer result of the same bit length as the inputs!");
26344 {Pg, Op1, Op2,
Op.getOperand(2)});
26352AArch64TargetLowering::LowerFixedLengthBitcastToSVE(
SDValue Op,
26355 auto SrcOp =
Op.getOperand(0);
26356 EVT VT =
Op.getValueType();
26358 EVT ContainerSrcVT =
26366SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
26372 "Unexpected number of operands in CONCAT_VECTORS");
26374 auto SrcOp1 =
Op.getOperand(0);
26375 auto SrcOp2 =
Op.getOperand(1);
26376 EVT VT =
Op.getValueType();
26377 EVT SrcVT = SrcOp1.getValueType();
26379 if (NumOperands > 2) {
26382 for (
unsigned I = 0;
I < NumOperands;
I += 2)
26384 Op->getOperand(
I),
Op->getOperand(
I + 1)));
26401AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(
SDValue Op,
26403 EVT VT =
Op.getValueType();
26418 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
26420 Pg, Val, DAG.
getUNDEF(ContainerVT));
26426AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(
SDValue Op,
26428 EVT VT =
Op.getValueType();
26450AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(
SDValue Op,
26452 EVT VT =
Op.getValueType();
26475 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
26485 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
26494AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(
SDValue Op,
26497 EVT OpVT =
Op.getValueType();
26499 "Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
26510 EVT OpVT =
Op.getValueType();
26512 "Expected scalable vector in LowerVECTOR_INTERLEAVE.");
26522AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(
SDValue Op,
26524 EVT VT =
Op.getValueType();
26546 Val = getSVESafeBitCast(CvtVT, Val, DAG);
26547 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
26569 unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
26570 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
26574 if (!Subtarget.isNeonAvailable() && !MinSVESize)
26579 if (!IsSingleOp && (!Subtarget.hasSVE2() || MinSVESize != MaxSVESize))
26582 EVT VTOp1 =
Op.getOperand(0).getValueType();
26584 unsigned IndexLen = MinSVESize / BitsPerElt;
26587 assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.
size() <= IndexLen &&
26588 "Incorrectly legalised shuffle operation");
26591 for (
int Index : ShuffleMask) {
26597 if ((
unsigned)
Index >= ElementsPerVectorReg)
26598 Index += IndexLen - ElementsPerVectorReg;
26602 if ((
unsigned)
Index >= MaxOffset)
26611 for (
unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i)
26627 else if (Subtarget.hasSVE2())
26630 DAG.
getConstant(Intrinsic::aarch64_sve_tbl2,
DL, MVT::i32),
26631 Op1, Op2, SVEMask);
26638SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
26640 EVT VT =
Op.getValueType();
26643 auto *SVN = cast<ShuffleVectorSDNode>(
Op.getNode());
26644 auto ShuffleMask = SVN->
getMask();
26654 auto MinLegalExtractEltScalarTy = [](
EVT ScalarTy) ->
EVT {
26655 if (ScalarTy == MVT::i8 || ScalarTy == MVT::i16)
26669 bool ReverseEXT =
false;
26671 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
26683 for (
unsigned LaneSize : {64U, 32U, 16U}) {
26684 if (
isREVMask(ShuffleMask, VT, LaneSize)) {
26691 else if (EltSz == 16)
26697 Op = LowerToPredicatedOp(
Op, DAG, RevOp);
26715 unsigned WhichResult;
26716 if (
isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
26720 if (
isTRNMask(ShuffleMask, VT, WhichResult)) {
26723 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op2));
26733 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op1));
26756 if (MinSVESize == MaxSVESize && MaxSVESize == VT.
getSizeInBits()) {
26763 if (
isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
26767 if (
isUZPMask(ShuffleMask, VT, WhichResult)) {
26770 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op2));
26780 DAG, VT, DAG.
getNode(Opc,
DL, ContainerVT, Op1, Op1));
26797 EVT InVT =
Op.getValueType();
26801 "Only expect to cast between legal scalable vector types!");
26804 "For predicate bitcasts, use getSVEPredicateBitCast");
26820 VT == PackedVT || InVT == PackedInVT) &&
26821 "Unexpected bitcast!");
26824 if (InVT != PackedInVT)
26830 if (VT != PackedVT)
26838 return ::isAllActivePredicate(DAG,
N);
26842 return ::getPromotedVTForPredicate(VT);
26845bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
26847 const APInt &OriginalDemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO,
26848 unsigned Depth)
const {
26850 unsigned Opc =
Op.getOpcode();
26867 if (ShiftRBits != ShiftLBits)
26870 unsigned ScalarSize =
Op.getScalarValueSizeInBits();
26871 assert(ScalarSize > ShiftLBits &&
"Invalid shift imm");
26874 APInt UnusedBits = ~OriginalDemandedBits;
26876 if ((ZeroBits & UnusedBits) != ZeroBits)
26886 if (!MaxSVEVectorSizeInBits)
26888 unsigned MaxElements = MaxSVEVectorSizeInBits / *ElementSize;
26904 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
26907bool AArch64TargetLowering::isTargetCanonicalConstantNode(
SDValue Op)
const {
26916 return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
26917 Subtarget->hasComplxNum();
26922 auto *VTy = dyn_cast<VectorType>(Ty);
26928 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
26932 unsigned NumElements = VTy->getElementCount().getKnownMinValue();
26939 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
26943 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
26945 return 8 <= ScalarWidth && ScalarWidth <= 64;
26948 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
26949 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
26964 "Vector type must be either 64 or a power of 2 that is at least 128");
26966 if (TyWidth > 128) {
26969 auto *LowerSplitA =
B.CreateExtractVector(HalfTy, InputA,
B.getInt64(0));
26970 auto *LowerSplitB =
B.CreateExtractVector(HalfTy, InputB,
B.getInt64(0));
26971 auto *UpperSplitA =
26972 B.CreateExtractVector(HalfTy, InputA,
B.getInt64(Stride));
26973 auto *UpperSplitB =
26974 B.CreateExtractVector(HalfTy, InputB,
B.getInt64(Stride));
26975 Value *LowerSplitAcc =
nullptr;
26976 Value *UpperSplitAcc =
nullptr;
26978 LowerSplitAcc =
B.CreateExtractVector(HalfTy,
Accumulator,
B.getInt64(0));
26980 B.CreateExtractVector(HalfTy,
Accumulator,
B.getInt64(Stride));
26983 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
26985 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
26989 return B.CreateInsertVector(Ty, Result, UpperSplitInt,
B.getInt64(Stride));
26998 return B.CreateIntrinsic(
26999 Intrinsic::aarch64_sve_cmla_x, Ty,
27000 {
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
27003 return B.CreateIntrinsic(
27004 Intrinsic::aarch64_sve_fcmla, Ty,
27005 {Mask,
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
27008 Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
27009 Intrinsic::aarch64_neon_vcmla_rot90,
27010 Intrinsic::aarch64_neon_vcmla_rot180,
27011 Intrinsic::aarch64_neon_vcmla_rot270};
27014 return B.CreateIntrinsic(IdMap[(
int)Rotation], Ty,
27023 return B.CreateIntrinsic(
27024 Intrinsic::aarch64_sve_cadd_x, Ty,
27025 {InputA, InputB,
B.getInt32((
int)Rotation * 90)});
27028 return B.CreateIntrinsic(
27029 Intrinsic::aarch64_sve_fcadd, Ty,
27030 {Mask, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
27037 IntId = Intrinsic::aarch64_neon_vcadd_rot90;
27039 IntId = Intrinsic::aarch64_neon_vcadd_rot270;
27044 return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
27050bool AArch64TargetLowering::preferScalarizeSplat(
SDNode *
N)
const {
27051 unsigned Opc =
N->getOpcode();
27054 [&](
SDNode *
Use) { return Use->getOpcode() == ISD::MUL; }))
27060unsigned AArch64TargetLowering::getMinimumJumpTableEntries()
const {
27067 bool NonUnitFixedLengthVector =
27074 unsigned NumIntermediates;
27082 bool NonUnitFixedLengthVector =
27089 unsigned NumIntermediates;
27091 NumIntermediates, VT2);
27096 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
27098 Context,
CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
27104 assert(IntermediateVT == RegisterVT &&
"Unexpected VT mismatch!");
27115 IntermediateVT = NewVT;
27118 return NumIntermediates;
27125 NumIntermediates *= NumSubRegs;
27126 NumRegs *= NumSubRegs;
27132 IntermediateVT = RegisterVT = MVT::v16i8;
27135 IntermediateVT = RegisterVT = MVT::v8i16;
27138 IntermediateVT = RegisterVT = MVT::v4i32;
27141 IntermediateVT = RegisterVT = MVT::v2i64;
27144 IntermediateVT = RegisterVT = MVT::v8f16;
27147 IntermediateVT = RegisterVT = MVT::v4f32;
27150 IntermediateVT = RegisterVT = MVT::v2f64;
27153 IntermediateVT = RegisterVT = MVT::v8bf16;
unsigned const MachineRegisterInfo * MRI
static unsigned MatchRegisterName(StringRef Name)
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static void CustomNonLegalBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, EVT ExtendVT, EVT CastVT)
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG)
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG)
static SDValue performLastTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static std::optional< PredicateConstraint > parsePredicateConstraint(StringRef Constraint)
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo)
static std::optional< unsigned > IsSVECntIntrinsic(SDValue S)
static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG)
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
static bool isLegalArithImmed(uint64_t C)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static ScalableVectorType * getSVEContainerIRType(FixedVectorType *VTy)
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG)
static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG)
static bool shouldSinkVScale(Value *Op, SmallVectorImpl< Use * > &Ops)
We want to sink following cases: (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A,...
static bool isZeroingInactiveLanes(SDValue Op)
static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG)
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
static SDValue tryCombineMULLWithUZP1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG)
static bool isZerosVector(const SDNode *N)
isZerosVector - Check whether SDNode N is a zero-filled vector.
static EVT tryGetOriginalBoolVectorType(SDValue Op, int Depth=0)
static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG)
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point divide by power of two into fixed-point to floating-point conversion.
static const TargetRegisterClass * getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT)
static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG, bool Invert)
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
static bool isPredicateCCSettingOp(SDValue N)
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG)
static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2, ArrayRef< int > ShuffleMask, EVT VT, EVT ContainerVT, SelectionDAG &DAG)
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static MVT getSVEContainerType(EVT ContentTy)
static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG)
static bool isMergePassthruOpcode(unsigned Opc)
static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG, SDLoc DL, bool &IsMLA)
static SDValue performFADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
static bool isCheapToExtend(const SDValue &N)
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl< Use * > &Ops)
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG)
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static unsigned getDUPLANEOp(EVT EltType)
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isExtendOrShiftOperand(SDValue N)
static bool isLanes1toNKnownZero(SDValue Op)
static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, SelectionDAG &DAG)
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don't change the type).
static EVT getPackedSVEVectorVT(EVT VT)
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG)
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performFlagSettingCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned GenericOpcode)
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static bool isCMP(SDValue Op)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static Function * getStructuredLoadFunction(Module *M, unsigned Factor, bool Scalable, Type *LDVTy, Type *PtrTy)
static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG)
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, SelectionDAG &DAG, bool UnpredOp=false, bool SwapOperands=false)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated)
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian)
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, unsigned Opcode, SelectionDAG &DAG)
static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy, bool IsLittleEndian)
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static const MCPhysReg GPRArgRegs[]
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
static SDValue performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPassedInFPR(EVT VT)
static unsigned getIntrinsicID(const SDNode *N)
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert)
static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG)
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static cl::opt< bool > EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, cl::desc("Combine ext and trunc to TBL"), cl::init(true))
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue removeRedundantInsertVectorElt(SDNode *N)
static std::optional< AArch64CC::CondCode > getCSETCondCode(SDValue Op)
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
static bool isNegatedInteger(SDValue Op)
static SDValue performFirstTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isLoadOrMultipleLoads(SDValue B, SmallVector< LoadSDNode * > &Loads)
static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
static Function * getStructuredStoreFunction(Module *M, unsigned Factor, bool Scalable, Type *STVTy, Type *PtrTy)
static SDValue performVectorShiftCombine(SDNode *N, const AArch64TargetLowering &TLI, TargetLowering::DAGCombinerInfo &DCI)
Optimize a vector shift instruction and its operand if shifted out bits are not used.
static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG)
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
static bool isOrXorChain(SDValue N, unsigned &Num, SmallVector< std::pair< SDValue, SDValue >, 16 > &WorkList)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd)
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG)
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG)
static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG)
Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern into sext/zext(buildvecto...
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG)
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg FPRArgRegs[]
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG)
Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR, WZR, invert(<cond>)'.
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static void replaceBoolVectorBitcast(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
static std::optional< ReducedGprConstraint > parseReducedGprConstraint(StringRef Constraint)
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
static EVT calculatePreExtendType(SDValue Extend)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG)
static EVT getPromotedVTForPredicate(EVT VT)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
static SDValue performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static Value * UseTlsOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG)
static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG)
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG)
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG)
static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode, AtomicOrdering Ordering)
static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL)
static SDValue performMSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, SDLoc DL, SelectionDAG &DAG)
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG)
static SDValue performOrXorChainCombine(SDNode *N, SelectionDAG &DAG)
static bool isSplatShuffle(Value *V)
bool isHalvingTruncateOfLegalScalableType(EVT SrcVT, EVT DstVT)
static SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performAddCombineForShiftedOperands(SDNode *N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode, bool IsSigned)
static bool isPackedVectorType(EVT VT, SelectionDAG &DAG)
Returns true if VT's elements occupy the lowest bit positions of its associated register class withou...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performMaskedGatherScatterCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBuildVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64TargetLowering &TLI)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG)
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
static SDValue performUADDVZextCombine(SDValue A, SelectionDAG &DAG)
static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG)
Perform the scalar expression combine in the form of: CSEL(c, 1, cc) + b => CSINC(b+c,...
static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static std::optional< uint64_t > getConstantLaneNumOfExtractHalfOperand(SDValue &Op)
static void ReplaceATOMIC_LOAD_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool areLoadedOffsetButOtherwiseSame(SDValue Op0, SDValue Op1, SelectionDAG &DAG, unsigned &NumSubLoads)
static SDValue performLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG, StoreSDNode *Store)
static bool isEssentiallyExtractHighSubvector(SDValue N)
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
static cl::opt< unsigned > MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors"))
static SDValue performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue performMULLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static const MVT MVT_CC
Value type used for condition codes.
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performReinterpretCastCombine(SDNode *N)
SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG)
static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsLess, bool IsEqual)
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N)
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl)
static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Simplify Addr given that the top byte of it is ignored by HW during address translation.
static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
static bool isAllInactivePredicate(SDValue N)
static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, SDLoc DL, SelectionDAG &DAG)
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isWideTypeMask(ArrayRef< int > M, EVT VT, SmallVectorImpl< int > &NewMask)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, SelectionDAG &DAG)
static SDValue performANDSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static const TargetRegisterClass * getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT)
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode, SDNode *AndNode, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex, unsigned CC)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
#define FALKOR_STRIDED_ACCESS_MD
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static const unsigned PerfectShuffleTable[6561+1]
static unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
static bool isSigned(unsigned int Opcode)
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
Module.h This file contains the declarations for the Module class.
This file defines ARC utility functions which are used by various parts of the compiler.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const int BlockSize
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
unsigned getLazySaveTPIDR2Obj() const
bool branchTargetEnforcement() const
unsigned getVarArgsFPRSize() const
void setVarArgsStackOffset(unsigned Offset)
void setVarArgsStackIndex(int Index)
void setTailCallReservedStack(unsigned bytes)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setIsSplitCSR(bool s)
int getVarArgsFPRIndex() const
void incNumLocalDynamicTLSAccesses()
void setBytesInStackArgArea(unsigned bytes)
int getVarArgsStackIndex() const
void setVarArgsGPRIndex(int Index)
int getVarArgsGPRIndex() const
void setHasSwiftAsyncContext(bool HasContext)
void setPStateSMReg(Register Reg)
void setVarArgsFPRSize(unsigned Size)
unsigned getVarArgsStackOffset() const
void setLazySaveTPIDR2Obj(unsigned Reg)
unsigned getVarArgsGPRSize() const
unsigned getSRetReturnReg() const
Register getPStateSMReg() const
void setSRetReturnReg(unsigned Reg)
unsigned getBytesInStackArgArea() const
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym)
void setVarArgsFPRIndex(int Index)
void setVarArgsGPRSize(unsigned Size)
void setArgumentStackToRestore(unsigned bytes)
void setHasStreamingModeChanges(bool HasChanges)
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const
bool isTargetWindows() const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned getMinimumJumpTableEntries() const
const AArch64InstrInfo * getInstrInfo() const override
const char * getSecurityCheckCookieName() const
unsigned getMaximumJumpTableSize() const
bool isTargetDarwin() const
bool isTargetILP32() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
bool hasSVE2orSME() const
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
Align getPrefLoopAlignment() const
Align getPrefFunctionAlignment() const
bool isTargetMachO() const
unsigned getMaxBytesForLoopAlignment() const
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
bool isTargetAndroid() const
const Triple & getTargetTriple() const
bool isCallingConvWin64(CallingConv::ID CC) const
const char * getChkStkName() const
bool useSVEForFixedLengthVectors() const
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isLittleEndian() const
bool isWindowsArm64EC() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
bool isXRegisterReserved(size_t i) const
bool isTargetFuchsia() const
unsigned getMaxSVEVectorSizeInBits() const
unsigned getMinSVEVectorSizeInBits() const
bool hasCustomCallingConv() const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InGlue, SDValue PStateSM, bool Entry) const
If a change in streaming mode is required on entry to/return from a function call it emits and return...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
EVT getPromotedVTForPredicate(EVT VT) const
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const override
Return the prefered common base offset.
bool shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert a trailing fence without reducing the ordering f...
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
MachineBasicBlock * EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool shouldRemoveRedundantExtend(SDValue Op) const override
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
MachineBasicBlock * EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool isOpSuitableForLSE128(const Instruction *I) const
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
bool fallBackToDAGISel(const Instruction &Inst) const override
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const override
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
MachineBasicBlock * EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
bool isOpSuitableForRCPC3(const Instruction *I) const
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if it is profitable to reduce a load to a smaller type.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
MachineBasicBlock * EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, bool Op0IsDef) const
MachineBasicBlock * EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const override
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
bool isOpSuitableForLDPSTP(const Instruction *I) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldConsiderGEPOffsetSplit() const override
bool isVectorClearMaskLegal(ArrayRef< int > M, EVT VT) const override
Similar to isShuffleMaskLegal.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
MachineBasicBlock * EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override
Return true if the @llvm.get.active.lane.mask intrinsic should be expanded using generic code in Sele...
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON=false) const
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
bool sle(const APInt &RHS) const
Signed less or equal comparison.
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isMask(unsigned numBits) const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
bool isSignBitSet() const
Determine if sign bit of this APInt is set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
bool isOne() const
Determine if this is a value of 1.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getCompareOperand()
an instruction that atomically reads a memory location, combines it with another value,...
@ Min
*p = old <signed v ? old : v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
bool isFloatingPointOperation() const
BinOp getOperation() const
This is an SDNode representing atomic operations.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const BlockAddress * getBlockAddress() const
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantFPSDNode * getConstantFPSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant FP or null if this is not a constant FP splat.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const APInt & getValue() const
Return the constant as an APInt value reference.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
bool isLittleEndian() const
Layout endianness...
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
FunctionType * getFunctionType() const
Returns the FunctionType for me.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int64_t getOffset() const
const GlobalValue * getGlobal() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
Common base class shared among various IRBuilders.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
BasicBlock * GetInsertBlock() const
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVMContext & getContext() const
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Value * getPointerOperand()
Type * getPointerOperandType() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static auto integer_fixedlen_vector_valuetypes()
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool isScalableVT() const
Return true if the type is a scalable type.
static auto all_valuetypes()
SimpleValueType Iteration.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto scalable_vector_valuetypes()
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
void setAdjustsStack(bool V)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
@ SSPLK_None
Did not trigger a stack protector.
void setFrameAddressIsTaken(bool T)
int getStackProtectorIndex() const
Return the index for the stack protector object.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
void setHasTailCall(bool V=true)
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
void setReturnAddressIsTaken(bool s)
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
This is a base class used to represent MGATHER and MSCATTER nodes.
const SDValue & getIndex() const
bool isIndexScaled() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
bool isIndexSigned() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
AtomicOrdering getMergedOrdering() const
Return a single atomic ordering that is at least as strong as both the success and failure orderings ...
const SDValue & getChain() const
bool isNonTemporal() const
bool isAtomic() const
Return true if the memory operation ordering is Unordered or higher.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void setNode(SDNode *N)
set the SDNode
unsigned getOpcode() const
unsigned getNumOperands() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
Class to represent scalable SIMD vectors.
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo)
Set CallSiteInfo to be associated with Node.
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
int getSplatIndex() const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
constexpr size_t size() const
size - Get the string size.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
static Type * getBFloatTy(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
@ FloatTyID
32-bit floating point type
@ DoubleTyID
64-bit floating point type
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
static IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Base class of all SIMD vector types.
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getTruncatedElementVectorType(VectorType *VTy)
Type * getElementType() const
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Type * getIndexedType() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
@ GLDFF1S_SXTW_MERGE_ZERO
@ GLDFF1_SCALED_MERGE_ZERO
@ GLD1_SXTW_SCALED_MERGE_ZERO
@ FP_EXTEND_MERGE_PASSTHRU
@ FP_ROUND_MERGE_PASSTHRU
@ GLDFF1_SXTW_SCALED_MERGE_ZERO
@ UINT_TO_FP_MERGE_PASSTHRU
@ FROUNDEVEN_MERGE_PASSTHRU
@ GLD1S_UXTW_SCALED_MERGE_ZERO
@ GLDNT1_INDEX_MERGE_ZERO
@ GLDFF1_UXTW_SCALED_MERGE_ZERO
@ FNEARBYINT_MERGE_PASSTHRU
@ GLDFF1S_SCALED_MERGE_ZERO
@ GLDFF1S_UXTW_SCALED_MERGE_ZERO
@ ZERO_EXTEND_INREG_MERGE_PASSTHRU
@ NVCAST
Natural vector cast.
@ BITREVERSE_MERGE_PASSTHRU
@ GLDFF1S_UXTW_MERGE_ZERO
@ SIGN_EXTEND_INREG_MERGE_PASSTHRU
@ GLDFF1S_SXTW_SCALED_MERGE_ZERO
@ GLD1S_SCALED_MERGE_ZERO
@ SINT_TO_FP_MERGE_PASSTHRU
@ GLD1_UXTW_SCALED_MERGE_ZERO
@ GLD1S_SXTW_SCALED_MERGE_ZERO
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
ArrayRef< MCPhysReg > getFPRArgRegs()
int getSMEPseudoMap(uint16_t Opcode)
static constexpr unsigned SVEMaxBitsPerVector
const unsigned RoundingBitsPos
static constexpr unsigned SVEBitsPerBlock
ArrayRef< MCPhysReg > getGPRArgRegs()
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ ARM64EC_Thunk_Native
Calling convention used in the ARM64EC ABI to implement calls between ARM64 code and thunks.
@ AArch64_VectorCall
Used between AArch64 Advanced SIMD functions.
@ Swift
Calling convention for Swift.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ CXX_FAST_TLS
Used for access functions.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
Preserve X0-X13, X19-X29, SP, Z0-Z31, P0-P15.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ ARM64EC_Thunk_X64
Calling convention used in the ARM64EC ABI to implement calls between x64 code and thunks.
@ C
The default llvm calling convention, compatible with C.
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SET_ROUNDING
Set rounding mode.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
@ SMULO
Same for multiplication.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
bool isExtOpcode(unsigned Opcode)
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed)
Returns true if the specified node is a vector where all elements can be truncated to the specified e...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
static const int LAST_INDEXED_MODE
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
CastInst_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_or< CastInst_match< OpTy, Instruction::ZExt >, CastInst_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
VScaleVal_match m_VScale()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
CodeModel::Model getCodeModel()
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
bool hasAttachedCallOpBundle(const CallBase *CB)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< TypeSize > *Offsets, TypeSize StartingOffset)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool isPackedVectorType(EVT SomeVT)
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &DL, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
testing::Matcher< const detail::ErrorHolder & > Failed()
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
std::optional< unsigned > getSVEPredPatternFromNumElements(unsigned MinNumElts)
Return specific VL predicate pattern based on the number of elements.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
unsigned M1(unsigned Val)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool CC_AArch64_Arm64EC_Thunk_Native(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ComplexDeinterleavingOperation
bool CC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool RetCC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
CodeGenOptLevel
Code generation optimization level.
constexpr int PoisonMaskElem
AtomicOrdering
Atomic ordering for LLVM's memory model.
ComplexDeinterleavingRotation
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
gep_type_iterator gep_type_begin(const User *GEP)
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
static const MachineMemOperand::Flags MOStridedAccess
bool CC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Helper structure to keep track of ISD::SET_CC operands.
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Helper structure to be able to read SetCC information.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Represent subnormal handling kind for floating point instruction inputs and outputs.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
uint64_t getScalarStoreSize() const
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
bool isScalableVT() const
Return true if the type is a scalable type.
bool isFixedLengthVector() const
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Describes a register that needs to be forwarded from the prologue to a musttail call.
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned getBitWidth() const
Get the bit width of this value.
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false)
Compute known bits for lshr(LHS, RHS).
static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false)
Compute known bits for ashr(LHS, RHS).
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Structure used to represent pair of argument number after call lowering and register used to transfer...
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
A MapVector that performs no allocations if smaller than a certain size.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::OutputArg, 32 > Outs
bool isBeforeLegalizeOps() const
bool isAfterLegalizeDAG() const
bool isCalledByLegalizer() const
bool isBeforeLegalize() const
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Helper structure to keep track of SetCC information.